[Pkg-opencl-devel] [beignet] 02/66: Imported Upstream version 0.0.0+git2013.04.01+d1b234c
Andreas Beckmann
anbe at moszumanska.debian.org
Fri Oct 31 07:27:00 UTC 2014
This is an automated email from the git hooks/post-receive script.
anbe pushed a commit to branch master
in repository beignet.
commit e82d6167fd9cdfe17cac402a330e4d196969b0db
Author: Simon Richter <sjr at debian.org>
Date: Tue Apr 9 17:14:00 2013 +0200
Imported Upstream version 0.0.0+git2013.04.01+d1b234c
---
.gitignore | 2 +
CMake/CMakeConfigTemplate.hpp | 28 +
CMake/FindCPP.cmake | 25 +
CMake/FindDRM.cmake | 36 +
CMake/FindDRMIntel.cmake | 29 +
CMake/FindFlex.cmake | 27 +
CMake/FindGBE.cmake | 36 +
CMake/FindXLib.cmake | 34 +
CMake/FindXext.cmake | 27 +
CMake/FindXfixes.cmake | 27 +
CMakeLists.txt | 104 +
COPYING | 502 ++
Makefile | 13 +
Makefile.defs | 45 +
Makefile.lib | 24 +
Makefile.shared | 15 +
README.md | 133 +
backend/.gitignore | 3 +
backend/CMakeLists.txt | 97 +
backend/Makefile | 4 +
backend/Makefile.defs | 25 +
backend/Makefile.lib | 23 +
backend/Makefile.shared | 15 +
backend/README.md | 57 +
backend/doc/TODO.md | 120 +
backend/doc/compiler_backend.md | 112 +
backend/doc/flat_address_space.md | 101 +
backend/doc/gen_ir.md | 256 +
backend/doc/unstructured_branches.md | 274 +
backend/src/CMakeLists.txt | 114 +
backend/src/Makefile | 4 +
backend/src/all-in-one/blob.cpp | 59 +
backend/src/backend/Makefile | 4 +
backend/src/backend/context.cpp | 545 ++
backend/src/backend/context.hpp | 130 +
backend/src/backend/gen/Makefile | 4 +
backend/src/backend/gen/gen_mesa_disasm.c | 1146 +++
backend/src/backend/gen/gen_mesa_disasm.h | 45 +
backend/src/backend/gen_context.cpp | 286 +
backend/src/backend/gen_context.hpp | 114 +
backend/src/backend/gen_defs.hpp | 757 ++
backend/src/backend/gen_encoder.cpp | 846 ++
backend/src/backend/gen_encoder.hpp | 177 +
.../src/backend/gen_insn_gen7_schedule_info.hxx | 17 +
backend/src/backend/gen_insn_scheduling.cpp | 597 ++
backend/src/backend/gen_insn_scheduling.hpp | 42 +
backend/src/backend/gen_insn_selection.cpp | 2049 +++++
backend/src/backend/gen_insn_selection.hpp | 215 +
backend/src/backend/gen_insn_selection.hxx | 35 +
backend/src/backend/gen_program.cpp | 122 +
backend/src/backend/gen_program.h | 37 +
backend/src/backend/gen_program.hpp | 69 +
backend/src/backend/gen_reg_allocation.cpp | 713 ++
backend/src/backend/gen_reg_allocation.hpp | 62 +
backend/src/backend/gen_register.hpp | 787 ++
backend/src/backend/program.cpp | 277 +
backend/src/backend/program.h | 175 +
backend/src/backend/program.hpp | 158 +
backend/src/ir/Makefile | 4 +
backend/src/ir/constant.cpp | 45 +
backend/src/ir/constant.hpp | 79 +
backend/src/ir/context.cpp | 178 +
backend/src/ir/context.hpp | 214 +
backend/src/ir/function.cpp | 316 +
backend/src/ir/function.hpp | 307 +
backend/src/ir/immediate.hpp | 88 +
backend/src/ir/instruction.cpp | 1254 +++
backend/src/ir/instruction.hpp | 521 ++
backend/src/ir/instruction.hxx | 71 +
backend/src/ir/liveness.cpp | 109 +
backend/src/ir/liveness.hpp | 134 +
backend/src/ir/lowering.cpp | 380 +
backend/src/ir/lowering.hpp | 94 +
backend/src/ir/profile.cpp | 81 +
backend/src/ir/profile.hpp | 76 +
backend/src/ir/register.cpp | 63 +
backend/src/ir/register.hpp | 158 +
backend/src/ir/type.cpp | 49 +
backend/src/ir/type.hpp | 95 +
backend/src/ir/unit.cpp | 62 +
backend/src/ir/unit.hpp | 85 +
backend/src/ir/value.cpp | 594 ++
backend/src/ir/value.hpp | 266 +
backend/src/llvm/CMakeLists.txt | 19 +
backend/src/llvm/Makefile | 3 +
backend/src/llvm/llvm_gen_backend.cpp | 1980 +++++
backend/src/llvm/llvm_gen_backend.hpp | 61 +
backend/src/llvm/llvm_gen_ocl_function.hxx | 42 +
backend/src/llvm/llvm_passes.cpp | 354 +
backend/src/llvm/llvm_to_gen.cpp | 96 +
backend/src/llvm/llvm_to_gen.hpp | 39 +
backend/src/ocl_stdlib.h | 469 ++
backend/src/ocl_stdlib_str.cpp | 475 ++
backend/src/sys/Makefile | 3 +
backend/src/sys/alloc.cpp | 359 +
backend/src/sys/alloc.hpp | 341 +
backend/src/sys/assert.cpp | 81 +
backend/src/sys/assert.hpp | 35 +
backend/src/sys/atomic.hpp | 56 +
backend/src/sys/cvar.cpp | 65 +
backend/src/sys/cvar.hpp | 80 +
backend/src/sys/exception.hpp | 56 +
backend/src/sys/fixed_array.hpp | 84 +
backend/src/sys/hash_map.hpp | 82 +
backend/src/sys/intrinsics.hpp | 209 +
backend/src/sys/intrusive_list.cpp | 66 +
backend/src/sys/intrusive_list.hpp | 176 +
backend/src/sys/list.hpp | 65 +
backend/src/sys/map.hpp | 75 +
backend/src/sys/mutex.cpp | 48 +
backend/src/sys/mutex.hpp | 74 +
backend/src/sys/platform.cpp | 79 +
backend/src/sys/platform.hpp | 390 +
backend/src/sys/set.hpp | 70 +
backend/src/sys/vector.hpp | 79 +
include/CL/cl.h | 994 +++
include/CL/cl_d3d10.h | 129 +
include/CL/cl_d3d9.h | 98 +
include/CL/cl_ext.h | 209 +
include/CL/cl_gl.h | 151 +
include/CL/cl_intel.h | 66 +
include/CL/cl_platform.h | 1194 +++
include/CL/glext.h | 8662 ++++++++++++++++++++
include/CL/opencl.h | 50 +
kernels/compiler_argument_structure.cl | 9 +
kernels/compiler_argument_structure_indirect.cl | 9 +
kernels/compiler_array.cl | 14 +
kernels/compiler_array0.cl | 16 +
kernels/compiler_array1.cl | 15 +
kernels/compiler_array2.cl | 13 +
kernels/compiler_array3.cl | 14 +
kernels/compiler_box_blur.cl | 113 +
kernels/compiler_box_blur_float.cl | 48 +
kernels/compiler_box_blur_float_ref.bmp | Bin 0 -> 49206 bytes
kernels/compiler_box_blur_ref.bmp | Bin 0 -> 49206 bytes
kernels/compiler_byte_scatter.cl | 7 +
kernels/compiler_chocolux.cl | 64 +
kernels/compiler_chocolux_ref.bmp | Bin 0 -> 196662 bytes
kernels/compiler_clod.cl | 91 +
kernels/compiler_clod_ref.bmp | Bin 0 -> 196662 bytes
kernels/compiler_function_argument.cl | 7 +
kernels/compiler_function_argument0.cl | 7 +
kernels/compiler_function_argument1.cl | 7 +
kernels/compiler_gather_register_file.cl | 10 +
kernels/compiler_gather_register_file0.cl | 10 +
kernels/compiler_gather_register_file1.cl | 11 +
kernels/compiler_if_else.cl | 14 +
kernels/compiler_insert_to_constant.cl | 6 +
kernels/compiler_insn_selection_masked_min_max.cl | 11 +
kernels/compiler_insn_selection_max.cl | 7 +
kernels/compiler_insn_selection_min.cl | 7 +
kernels/compiler_julia.cl | 146 +
kernels/compiler_julia_no_break.cl | 147 +
kernels/compiler_julia_no_break_ref.bmp | Bin 0 -> 196662 bytes
kernels/compiler_julia_ref.bmp | Bin 0 -> 196662 bytes
kernels/compiler_local_memory.cl | 5 +
kernels/compiler_local_memory_barrier.cl | 6 +
kernels/compiler_local_memory_barrier_wg64.cl | 6 +
kernels/compiler_local_memory_two_ptr.cl | 9 +
kernels/compiler_local_slm.cl | 10 +
kernels/compiler_lower_return0.cl | 8 +
kernels/compiler_lower_return1.cl | 8 +
kernels/compiler_lower_return2.cl | 11 +
kernels/compiler_mandelbrot.cl | 47 +
kernels/compiler_mandelbrot_alternate.cl | 38 +
kernels/compiler_mandelbrot_alternate_ref.bmp | Bin 0 -> 196662 bytes
kernels/compiler_mandelbrot_ref.bmp | Bin 0 -> 196662 bytes
kernels/compiler_math.cl | 14 +
kernels/compiler_menger_sponge.cl | 189 +
kernels/compiler_menger_sponge_no_shadow.cl | 125 +
kernels/compiler_menger_sponge_no_shadow_ref.bmp | Bin 0 -> 196662 bytes
kernels/compiler_menger_sponge_ref.bmp | Bin 0 -> 196662 bytes
kernels/compiler_nautilus.cl | 68 +
kernels/compiler_nautilus_ref.bmp | Bin 0 -> 196662 bytes
kernels/compiler_obread.cl | 8 +
kernels/compiler_obwrite.cl | 8 +
kernels/compiler_region.cl | 10 +
kernels/compiler_region0.cl | 11 +
kernels/compiler_region1.cl | 9 +
kernels/compiler_ribbon.cl | 89 +
kernels/compiler_ribbon_ref.bmp | Bin 0 -> 196662 bytes
kernels/compiler_short_scatter.cl | 7 +
kernels/compiler_sub_bytes.cl | 7 +
kernels/compiler_sub_shorts.cl | 7 +
kernels/compiler_switch.cl | 14 +
kernels/compiler_uint16_copy.cl | 8 +
kernels/compiler_uint2_copy.cl | 7 +
kernels/compiler_uint3_copy.cl | 7 +
kernels/compiler_uint3_unaligned_copy.cl | 8 +
kernels/compiler_uint8_copy.cl | 7 +
kernels/compiler_unstructured_branch0.cl | 14 +
kernels/compiler_unstructured_branch1.cl | 14 +
kernels/compiler_unstructured_branch2.cl | 18 +
kernels/compiler_unstructured_branch3.cl | 16 +
kernels/compiler_vote_all.cl | 10 +
kernels/compiler_vote_any.cl | 10 +
kernels/compiler_write_only_bytes.cl | 7 +
kernels/compiler_write_only_shorts.cl | 7 +
kernels/lenna128x128.bmp | Bin 0 -> 49206 bytes
kernels/test_copy_buffer.cl | 7 +
kernels/test_copy_buffer_row.cl | 9 +
kernels/test_write_only.cl | 7 +
setup_fulsim_hsw.sh | 5 +
setup_fulsim_ivb.sh | 5 +
setup_perfsim_ivb.sh | 4 +
src/CMakeLists.txt | 40 +
src/Makefile | 4 +
src/cl_alloc.c | 87 +
src/cl_alloc.h | 47 +
src/cl_api.c | 1184 +++
src/cl_command_queue.c | 372 +
src/cl_command_queue.h | 73 +
src/cl_command_queue_gen7.c | 248 +
src/cl_context.c | 203 +
src/cl_context.h | 86 +
src/cl_device_data.h | 95 +
src/cl_device_id.c | 274 +
src/cl_device_id.h | 122 +
src/cl_driver.cpp | 40 +
src/cl_driver.h | 226 +
src/cl_driver_defs.c | 60 +
src/cl_event.c | 20 +
src/cl_event.h | 27 +
src/cl_gen75_device.h | 29 +
src/cl_gen7_device.h | 28 +
src/cl_gt_device.h | 77 +
src/cl_image.c | 214 +
src/cl_image.h | 44 +
src/cl_internals.h | 35 +
src/cl_kernel.c | 244 +
src/cl_kernel.h | 95 +
src/cl_mem.c | 393 +
src/cl_mem.h | 79 +
src/cl_platform_id.c | 119 +
src/cl_platform_id.h | 42 +
src/cl_program.c | 330 +
src/cl_program.h | 99 +
src/cl_sampler.c | 93 +
src/cl_sampler.h | 51 +
src/cl_utils.h | 256 +
src/intel/Makefile | 4 +
src/intel/intel_batchbuffer.c | 177 +
src/intel/intel_batchbuffer.h | 147 +
src/intel/intel_defines.h | 305 +
src/intel/intel_driver.c | 399 +
src/intel/intel_driver.h | 121 +
src/intel/intel_gpgpu.c | 668 ++
src/intel/intel_gpgpu.h | 34 +
src/intel/intel_structs.h | 434 +
src/x11/Makefile | 4 +
src/x11/dricommon.c | 329 +
src/x11/dricommon.h | 99 +
src/x11/va_dri2.c | 327 +
src/x11/va_dri2.h | 89 +
src/x11/va_dri2str.h | 211 +
src/x11/va_dri2tokens.h | 66 +
utests/CMakeLists.txt | 65 +
utests/Makefile | 78 +
utests/compiler_argument_structure.cpp | 28 +
utests/compiler_argument_structure_indirect.cpp | 29 +
utests/compiler_array.cpp | 28 +
utests/compiler_array0.cpp | 54 +
utests/compiler_array1.cpp | 52 +
utests/compiler_array2.cpp | 50 +
utests/compiler_array3.cpp | 51 +
utests/compiler_box_blur.cpp | 43 +
utests/compiler_box_blur_float.cpp | 65 +
utests/compiler_byte_scatter.cpp | 24 +
utests/compiler_copy_buffer.cpp | 32 +
utests/compiler_copy_buffer_row.cpp | 40 +
utests/compiler_function_argument.cpp | 27 +
utests/compiler_function_argument0.cpp | 26 +
utests/compiler_function_argument1.cpp | 31 +
utests/compiler_if_else.cpp | 64 +
utests/compiler_insert_to_constant.cpp | 30 +
utests/compiler_insn_selection_masked_min_max.cpp | 42 +
utests/compiler_insn_selection_max.cpp | 37 +
utests/compiler_insn_selection_min.cpp | 36 +
utests/compiler_local_memory.cpp | 47 +
utests/compiler_local_memory_barrier.cpp | 46 +
utests/compiler_local_memory_barrier_wg64.cpp | 46 +
utests/compiler_local_memory_two_ptr.cpp | 50 +
utests/compiler_local_slm.cpp | 10 +
utests/compiler_lower_return0.cpp | 54 +
utests/compiler_lower_return1.cpp | 47 +
utests/compiler_lower_return2.cpp | 48 +
utests/compiler_mandelbrot.cpp | 48 +
utests/compiler_mandelbrot_alternate.cpp | 54 +
utests/compiler_math.cpp | 55 +
utests/compiler_shader_toy.cpp | 82 +
utests/compiler_short_scatter.cpp | 25 +
utests/compiler_sub_bytes.cpp | 35 +
utests/compiler_sub_shorts.cpp | 36 +
utests/compiler_switch.cpp | 48 +
utests/compiler_uint16_copy.cpp | 35 +
utests/compiler_uint2_copy.cpp | 31 +
utests/compiler_uint3_copy.cpp | 40 +
utests/compiler_uint3_unaligned_copy.cpp | 42 +
utests/compiler_uint8_copy.cpp | 35 +
utests/compiler_unstructured_branch0.cpp | 55 +
utests/compiler_unstructured_branch1.cpp | 54 +
utests/compiler_unstructured_branch2.cpp | 68 +
utests/compiler_unstructured_branch3.cpp | 58 +
utests/compiler_write_only.cpp | 43 +
utests/compiler_write_only_bytes.cpp | 23 +
utests/compiler_write_only_shorts.cpp | 24 +
utests/runtime_flat_address_space.cpp | 75 +
utests/utest.cpp | 78 +
utests/utest.hpp | 88 +
utests/utest_assert.cpp | 42 +
utests/utest_assert.hpp | 44 +
utests/utest_error.c | 76 +
utests/utest_error.h | 26 +
utests/utest_exception.hpp | 48 +
utests/utest_file_map.cpp | 117 +
utests/utest_file_map.hpp | 84 +
utests/utest_helper.cpp | 482 ++
utests/utest_helper.hpp | 143 +
utests/utest_run.cpp | 44 +
319 files changed, 48822 insertions(+)
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..bae14b5
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+*.o
+*.so*
diff --git a/CMake/CMakeConfigTemplate.hpp b/CMake/CMakeConfigTemplate.hpp
new file mode 100644
index 0000000..7702c54
--- /dev/null
+++ b/CMake/CMakeConfigTemplate.hpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef CMAKE_CONFIG_HPP
+#define CMAKE_CONFIG_HPP
+
+#define ON true
+#define OFF false
+#define GEN_INSTALLATION_PATH "${CMAKE_INSTALL_PREFIX}/lib/i965/"
+
+#endif /* CMAKE_CONFIG_HPP */
+
diff --git a/CMake/FindCPP.cmake b/CMake/FindCPP.cmake
new file mode 100644
index 0000000..be05064
--- /dev/null
+++ b/CMake/FindCPP.cmake
@@ -0,0 +1,25 @@
+#
+# Try to find c preprocessor
+# Once done this will define
+#
+# CPP_FOUND
+# CPP_PATH
+#
+
+FIND_PROGRAM(CPP_PATH cpp
+ /bin/
+ /usr/bin/
+ /usr/local/bin
+ ~/bin
+ /opt/local/bin/
+ DOC "The directory where cpp is")
+
+IF(CPP_PATH)
+ SET(CPP_FOUND 1 CACHE STRING "Set to 1 if CPP is found, 0 otherwise")
+ELSE(CPP_PATH)
+ SET(CPP_FOUND 0 CACHE STRING "Set to 1 if CPP is found, 0 otherwise")
+ENDIF(CPP_PATH)
+
+MARK_AS_ADVANCED(CPP_FOUND)
+
+
diff --git a/CMake/FindDRM.cmake b/CMake/FindDRM.cmake
new file mode 100644
index 0000000..f65c457
--- /dev/null
+++ b/CMake/FindDRM.cmake
@@ -0,0 +1,36 @@
+#
+# Try to find X library and include path.
+# Once done this will define
+#
+# DRM_FOUND
+# DRM_INCLUDE_PATH
+# DRM_LIBRARY
+#
+
+FIND_PATH(DRM_INCLUDE_PATH drm.h
+ ~/include/libdrm/
+ /usr/include/libdrm/
+ /usr/local/include/libdrm/
+ /sw/include/libdrm/
+ /opt/local/include/libdrm/
+ DOC "The directory where drm.h resides")
+FIND_LIBRARY(DRM_LIBRARY
+ NAMES DRM drm
+ PATHS
+ ~/lib/
+ /usr/lib64
+ /usr/lib
+ /usr/local/lib64
+ /usr/local/lib
+ /sw/lib
+ /opt/local/lib
+ DOC "The DRM library")
+
+IF(DRM_INCLUDE_PATH)
+ SET(DRM_FOUND 1 CACHE STRING "Set to 1 if DRM is found, 0 otherwise")
+ELSE(DRM_INCLUDE_PATH)
+ SET(DRM_FOUND 0 CACHE STRING "Set to 1 if DRM is found, 0 otherwise")
+ENDIF(DRM_INCLUDE_PATH)
+
+MARK_AS_ADVANCED(DRM_FOUND)
+
diff --git a/CMake/FindDRMIntel.cmake b/CMake/FindDRMIntel.cmake
new file mode 100644
index 0000000..0577882
--- /dev/null
+++ b/CMake/FindDRMIntel.cmake
@@ -0,0 +1,29 @@
+#
+# Try to find X library and include path.
+# Once done this will define
+#
+# DRM_INTEL_FOUND
+# DRM_INTEL_INCLUDE_PATH
+#
+
+FIND_LIBRARY(DRM_INTEL_LIBRARY
+ NAMES DRM_INTEL drm_intel
+ PATHS
+ ~/lib/
+ /usr/lib64
+ /usr/lib
+ /usr/local/lib64
+ /usr/local/lib
+ /sw/lib
+ /opt/local/lib
+ /usr/lib/i386-linux-gnu/
+ DOC "The DRM_INTEL library")
+
+IF(DRM_INTEL_INCLUDE_PATH)
+ SET(DRM_INTEL_FOUND 1 CACHE STRING "Set to 1 if DRM_INTEL is found, 0 otherwise")
+ELSE(DRM_INTEL_INCLUDE_PATH)
+ SET(DRM_INTEL_FOUND 0 CACHE STRING "Set to 1 if DRM_INTEL is found, 0 otherwise")
+ENDIF(DRM_INTEL_INCLUDE_PATH)
+
+MARK_AS_ADVANCED(DRM_INTEL_FOUND)
+
diff --git a/CMake/FindFlex.cmake b/CMake/FindFlex.cmake
new file mode 100644
index 0000000..f7972eb
--- /dev/null
+++ b/CMake/FindFlex.cmake
@@ -0,0 +1,27 @@
+#
+# Try to find X library and include path.
+# Once done this will define
+#
+# FLEX_FOUND
+# FLEX_LIBRARY
+#
+
+FIND_LIBRARY(FLEX_LIBRARY
+ NAMES FLEX fl
+ PATHS
+ /usr/lib64
+ /usr/lib
+ /usr/local/lib64
+ /usr/local/lib
+ /sw/lib
+ /opt/local/lib
+ DOC "The FLEX library")
+
+IF(FLEX_INCLUDE_PATH)
+ SET(FLEX_FOUND 1 CACHE STRING "Set to 1 if FLEX is found, 0 otherwise")
+ELSE(FLEX_INCLUDE_PATH)
+ SET(FLEX_FOUND 0 CACHE STRING "Set to 1 if FLEX is found, 0 otherwise")
+ENDIF(FLEX_INCLUDE_PATH)
+
+MARK_AS_ADVANCED(FLEX_FOUND)
+
diff --git a/CMake/FindGBE.cmake b/CMake/FindGBE.cmake
new file mode 100644
index 0000000..4670483
--- /dev/null
+++ b/CMake/FindGBE.cmake
@@ -0,0 +1,36 @@
+#
+# Try to find X library and include path.
+# Once done this will define
+#
+# GBE_FOUND
+# GBE_INCLUDE_PATH
+# GBE_LIBRARY
+#
+
+FIND_PATH(GBE_INCLUDE_PATH gen/program.h
+ ~/include/
+ /usr/include/
+ /usr/local/include/
+ /sw/include/
+ /opt/local/include/
+ DOC "The directory where gen/program.h resides")
+FIND_LIBRARY(GBE_LIBRARY
+ NAMES GBE gbe
+ PATHS
+ ~/lib/
+ /usr/lib64
+ /usr/lib
+ /usr/local/lib64
+ /usr/local/lib
+ /sw/lib
+ /opt/local/lib
+ DOC "The GBE library")
+
+IF(GBE_INCLUDE_PATH)
+ SET(GBE_FOUND 1 CACHE STRING "Set to 1 if GBE is found, 0 otherwise")
+ELSE(GBE_INCLUDE_PATH)
+ SET(GBE_FOUND 0 CACHE STRING "Set to 1 if GBE is found, 0 otherwise")
+ENDIF(GBE_INCLUDE_PATH)
+
+MARK_AS_ADVANCED(GBE_FOUND)
+
diff --git a/CMake/FindXLib.cmake b/CMake/FindXLib.cmake
new file mode 100644
index 0000000..5047a0f
--- /dev/null
+++ b/CMake/FindXLib.cmake
@@ -0,0 +1,34 @@
+#
+# Try to find X library and include path.
+# Once done this will define
+#
+# XLIB_FOUND
+# XLIB_INCLUDE_PATH
+# XLIB_LIBRARY
+#
+
+FIND_PATH(XLIB_INCLUDE_PATH X11/Xlib.h
+ /usr/include
+ /usr/local/include
+ /sw/include
+ /opt/local/include
+ DOC "The directory where X11/Xlib.h resides")
+FIND_LIBRARY(XLIB_LIBRARY
+ NAMES XLIB X11
+ PATHS
+ /usr/lib64
+ /usr/lib
+ /usr/local/lib64
+ /usr/local/lib
+ /sw/lib
+ /opt/local/lib
+ DOC "The XLIB library")
+
+IF(XLIB_INCLUDE_PATH)
+ SET(XLIB_FOUND 1 CACHE STRING "Set to 1 if XLIB is found, 0 otherwise")
+ELSE(XLIB_INCLUDE_PATH)
+ SET(XLIB_FOUND 0 CACHE STRING "Set to 1 if XLIB is found, 0 otherwise")
+ENDIF(XLIB_INCLUDE_PATH)
+
+MARK_AS_ADVANCED(XLIB_FOUND)
+
diff --git a/CMake/FindXext.cmake b/CMake/FindXext.cmake
new file mode 100644
index 0000000..3f91370
--- /dev/null
+++ b/CMake/FindXext.cmake
@@ -0,0 +1,27 @@
+#
+# Try to find Xext library path.
+# Once done this will define
+#
+# XEXT_FOUND
+# XEXT_LIBRARY
+#
+
+FIND_LIBRARY(XEXT_LIBRARY
+ NAMES XEXT Xext
+ PATHS
+ /usr/lib64
+ /usr/lib
+ /usr/local/lib64
+ /usr/local/lib
+ /sw/lib
+ /opt/local/lib
+ DOC "The XEXT library")
+
+IF(XEXT_INCLUDE_PATH)
+ SET(XEXT_FOUND 1 CACHE STRING "Set to 1 if XEXT is found, 0 otherwise")
+ELSE(XEXT_INCLUDE_PATH)
+ SET(XEXT_FOUND 0 CACHE STRING "Set to 1 if XEXT is found, 0 otherwise")
+ENDIF(XEXT_INCLUDE_PATH)
+
+MARK_AS_ADVANCED(XEXT_FOUND)
+
diff --git a/CMake/FindXfixes.cmake b/CMake/FindXfixes.cmake
new file mode 100644
index 0000000..8bce1d6
--- /dev/null
+++ b/CMake/FindXfixes.cmake
@@ -0,0 +1,27 @@
+#
+# Try to find Xfixes library path.
+# Once done this will define
+#
+# XFIXES_FOUND
+# XFIXES_LIBRARY
+#
+
+FIND_LIBRARY(XFIXES_LIBRARY
+ NAMES XFIXES Xfixes
+ PATHS
+ /usr/lib64
+ /usr/lib
+ /usr/local/lib64
+ /usr/local/lib
+ /sw/lib
+ /opt/local/lib
+ DOC "The XFIXES library")
+
+IF(XFIXES_INCLUDE_PATH)
+ SET(XFIXES_FOUND 1 CACHE STRING "Set to 1 if XFIXES is found, 0 otherwise")
+ELSE(XFIXES_INCLUDE_PATH)
+ SET(XFIXES_FOUND 0 CACHE STRING "Set to 1 if XFIXES is found, 0 otherwise")
+ENDIF(XFIXES_INCLUDE_PATH)
+
+MARK_AS_ADVANCED(XFIXES_FOUND)
+
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..ea84071
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,104 @@
+#############################################################################
+# INTEL CORPORATION PROPRIETARY INFORMATION #
+# This software is supplied under the terms of a license agreement or #
+# nondisclosure agreement with Intel Corporation and may not be copied #
+# or disclosed except in accordance with the terms of that agreement. #
+# Copyright (C) 2009 Intel Corporation. All Rights Reserved. #
+#############################################################################
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0)
+PROJECT(OCL)
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
+
+SET(CMAKE_VERBOSE_MAKEFILE "false")
+SET(EMULATE_IVB false CACHE BOOL "To emulate IVB")
+SET(EMULATE_SNB false CACHE BOOL "To emulate SNB")
+SET(EMULATE_HSW false CACHE BOOL "To emulate HSW")
+ADD_DEFINITIONS(-D__$(USER)__)
+
+# Force Release with debug info
+if (NOT CMAKE_BUILD_TYPE)
+ set (CMAKE_BUILD_TYPE RelWithDebInfo)
+endif (NOT CMAKE_BUILD_TYPE)
+set (CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE STRING "assure config" FORCE)
+message(STATUS "Building mode: " ${CMAKE_BUILD_TYPE})
+
+IF (EMULATE_HSW)
+ SET (USE_FULSIM "true")
+ ADD_DEFINITIONS(-DEMULATE_GEN=75)
+ELSEIF (EMULATE_IVB)
+ SET (USE_FULSIM "true")
+ ADD_DEFINITIONS(-DEMULATE_GEN=7)
+ELSEIF (EMULATE_SNB)
+ SET (USE_FULSIM "true")
+ ADD_DEFINITIONS(-DEMULATE_GEN=6)
+ELSE (EMULATE_IVB)
+ SET (USE_FULSIM "false")
+ ADD_DEFINITIONS(-DEMULATE_GEN=0)
+ENDIF (EMULATE_HSW)
+
+IF (USE_FULSIM)
+ ADD_DEFINITIONS(-DUSE_FULSIM=1)
+ELSE (USE_FULSIM)
+ ADD_DEFINITIONS(-DUSE_FULSIM=0)
+ENDIF (USE_FULSIM)
+
+SET(CMAKE_CXX_FLAGS "-Wall -Wno-invalid-offsetof -mfpmath=sse --no-rtti -Wcast-align -std=c++0x")
+SET(CMAKE_C_FLAGS "-Wall -mfpmath=sse -msse2 -Wcast-align")
+
+# Front end stuff we need
+Find_Package(LLVM REQUIRED)
+
+# XLib
+INCLUDE(CMake/FindXLib.cmake)
+IF(XLIB_FOUND)
+ MESSAGE(STATUS "Looking for XLib - found")
+ELSE(XLIB_FOUND)
+ MESSAGE(STATUS "Looking for XLib - not found")
+ENDIF(XLIB_FOUND)
+
+# DRM
+INCLUDE(CMake/FindDRM.cmake)
+IF(DRM_FOUND)
+ MESSAGE(STATUS "Looking for DRM - found")
+ELSE(DRM_FOUND)
+ MESSAGE(STATUS "Looking for DRM - not found")
+ENDIF(DRM_FOUND)
+
+# DRM Intel
+INCLUDE(CMake/FindDRMIntel.cmake)
+IF(DRM_INTEL_FOUND)
+ MESSAGE(STATUS "Looking for DRM Intel - found")
+ELSE(DRM_INTEL_FOUND)
+ MESSAGE(STATUS "Looking for DRM Intel - not found")
+ENDIF(DRM_INTEL_FOUND)
+
+# Xext
+INCLUDE(CMake/FindXext.cmake)
+IF(XEXT_FOUND)
+ MESSAGE(STATUS "Looking for Xext - found")
+ELSE(XEXT_FOUND)
+ MESSAGE(STATUS "Looking for Xext - not found")
+ENDIF(XEXT_FOUND)
+
+# Xfixes
+INCLUDE(CMake/FindXfixes.cmake)
+IF(XFIXES_FOUND)
+ MESSAGE(STATUS "Looking for Xfixes - found")
+ELSE(XFIXES_FOUND)
+ MESSAGE(STATUS "Looking for Xfixes - not found")
+ENDIF(XFIXES_FOUND)
+
+# Gen-backend (compiler)
+INCLUDE(CMake/FindGBE.cmake)
+IF(GBE_FOUND)
+ MESSAGE(STATUS "Looking for Gen-Backend - found")
+ELSE(GBE_FOUND)
+ MESSAGE(STATUS "Looking for Gen-Backend - not found")
+ENDIF(GBE_FOUND)
+
+ADD_SUBDIRECTORY(backend)
+ADD_SUBDIRECTORY(src)
+ADD_SUBDIRECTORY(utests)
+
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..4362b49
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,502 @@
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL. It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+ This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it. You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations below.
+
+ When we speak of free software, we are referring to freedom of use,
+not price. Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+ To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights. These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+ For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you. You must make sure that they, too, receive or can get the source
+code. If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it. And you must show them these terms so they know their rights.
+
+ We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+ To protect each distributor, we want to make it very clear that
+there is no warranty for the free library. Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+
+ Finally, software patents pose a constant threat to the existence of
+any free program. We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder. Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+ Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License. This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License. We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+ When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library. The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom. The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+ We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License. It also provides other free software developers Less
+of an advantage over competing non-free programs. These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries. However, the Lesser license provides advantages in certain
+special circumstances.
+
+ For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it becomes
+a de-facto standard. To achieve this, non-free programs must be
+allowed to use the library. A more frequent case is that a free
+library does the same job as widely used non-free libraries. In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+ In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software. For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+ Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+ The precise terms and conditions for copying, distribution and
+modification follow. Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library". The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+
+ GNU LESSER GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+ A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+ The "Library", below, refers to any such software library or work
+which has been distributed under these terms. A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language. (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+ "Source code" for a work means the preferred form of the work for
+making modifications to it. For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+ Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it). Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+ 1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+ You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+ 2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) The modified work must itself be a software library.
+
+ b) You must cause the files modified to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ c) You must cause the whole of the work to be licensed at no
+ charge to all third parties under the terms of this License.
+
+ d) If a facility in the modified Library refers to a function or a
+ table of data to be supplied by an application program that uses
+ the facility, other than as an argument passed when the facility
+ is invoked, then you must make a good faith effort to ensure that,
+ in the event an application does not supply such function or
+ table, the facility still operates, and performs whatever part of
+ its purpose remains meaningful.
+
+ (For example, a function in a library to compute square roots has
+ a purpose that is entirely well-defined independent of the
+ application. Therefore, Subsection 2d requires that any
+ application-supplied function or table used by this function must
+ be optional: if the application does not supply it, the square
+ root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library. To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License. (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.) Do not make any other change in
+these notices.
+
+ Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+ This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+ 4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+ If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library". Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+ However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library". The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+ When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library. The
+threshold for this to be true is not precisely defined by law.
+
+ If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work. (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+ Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+ 6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+ You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License. You must supply a copy of this License. If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License. Also, you must do one
+of these things:
+
+ a) Accompany the work with the complete corresponding
+ machine-readable source code for the Library including whatever
+ changes were used in the work (which must be distributed under
+ Sections 1 and 2 above); and, if the work is an executable linked
+ with the Library, with the complete machine-readable "work that
+ uses the Library", as object code and/or source code, so that the
+ user can modify the Library and then relink to produce a modified
+ executable containing the modified Library. (It is understood
+ that the user who changes the contents of definitions files in the
+ Library will not necessarily be able to recompile the application
+ to use the modified definitions.)
+
+ b) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (1) uses at run time a
+ copy of the library already present on the user's computer system,
+ rather than copying library functions into the executable, and (2)
+ will operate properly with a modified version of the library, if
+ the user installs one, as long as the modified version is
+ interface-compatible with the version that the work was made with.
+
+ c) Accompany the work with a written offer, valid for at
+ least three years, to give the same user the materials
+ specified in Subsection 6a, above, for a charge no more
+ than the cost of performing this distribution.
+
+ d) If distribution of the work is made by offering access to copy
+ from a designated place, offer equivalent access to copy the above
+ specified materials from the same place.
+
+ e) Verify that the user has already received a copy of these
+ materials or that you have already sent this user a copy.
+
+ For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it. However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+ It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system. Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+ 7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+ a) Accompany the combined library with a copy of the same work
+ based on the Library, uncombined with any other library
+ facilities. This must be distributed under the terms of the
+ Sections above.
+
+ b) Give prominent notice with the combined library of the fact
+ that part of it is a work based on the Library, and explaining
+ where to find the accompanying uncombined form of the same work.
+
+ 8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License. Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License. However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+ 9. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Library or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+ 10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+
+ 11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all. For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded. In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+ 13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation. If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+ 14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission. For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this. Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+ NO WARRANTY
+
+ 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Libraries
+
+ If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change. You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+ To apply these terms, attach the following notices to the library. It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the library's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the
+ library `Frob' (a library for tweaking knobs) written by James Random Hacker.
+
+ <signature of Ty Coon>, 1 April 1990
+ Ty Coon, President of Vice
+
+That's all there is to it!
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..31fd8a9
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,13 @@
+TOP=.
+SUBDIRS=src src/sim src/intel src/x11
+
+all::
+
+clean::
+ +cd backend && make clean
+ +cd utests && make clean
+
+include $(TOP)/Makefile.shared
+
+all::
+ cd utests && make
\ No newline at end of file
diff --git a/Makefile.defs b/Makefile.defs
new file mode 100644
index 0000000..55c082e
--- /dev/null
+++ b/Makefile.defs
@@ -0,0 +1,45 @@
+HERE=$(shell pwd)
+
+GEN_BACKEND=$(HERE)/$(TOP)/backend
+
+LIB_BACKEND=$(GEN_BACKEND)/libgbe.so
+INC_BACKEND=$(GEN_BACKEND)/src/backend
+
+DRM_LIBS=$(shell pkg-config libdrm_intel --libs)
+DRM_CFLAGS=$(shell pkg-config libdrm_intel --cflags)
+
+XEXT_LIBS=$(shell pkg-config x11 xfixes xext --libs)
+XEXT_CFLAGS=$(shell pkg-config x11 xfixes xext --cflags)
+
+LLVM_LIBS=$(shell llvm-config --libs)
+LLVM_CFLAGS=$(shell llvm-config --cflags)
+LLVM_CXXFLAGS=$(shell llvm-config --cxxflags | sed 's/\-pedantic//g')
+
+LOCAL_CFLAGS=-Wall -funroll-loops -Wstrict-aliasing -fstrict-aliasing \
+ -msse2 -msse3 -mssse3 -msse4.1 -ffast-math -fPIC -Wall \
+ -DGBE_DEBUG=1 -I$(TOP)/src -I$(TOP)/include \
+ -I$(INC_BACKEND) $(DRM_CFLAGS) $(XEXT_CFLAGS)
+
+LOCAL_LIBS=$(DRM_LIBS) $(XEXT_LIBS)
+
+LOCAL_CXXFLAGS=$(LOCAL_CFLAGS) -std=c++0x -fno-rtti -Wno-invalid-offsetof
+
+CXXFLAGS=$(LOCAL_CXXFLAGS) $(LLVM_CXXFLAGS) $(DIR_CXXFLAGS)
+
+CFLAGS=$(LLVM_CFLAGS) $(LOCAL_CFLAGS) $(DIR_CFLAGS) -std=c99
+
+INC=$(shell for i in $(SUBDIRS); do ls $$i/*.h* 2>/dev/null; done)
+
+C_SRC=$(shell for i in $(SUBDIRS); do ls $$i/*.c 2>/dev/null; done)
+CPP_SRC=$(shell for i in $(SUBDIRS); do ls $$i/*.cpp 2>/dev/null; done)
+
+OBJ=$(C_SRC:.c=.o) $(CPP_SRC:.cpp=.o)
+
+LIBBASE=libcl.so
+MAJOR=0
+MINOR=0
+REVISION=1
+
+VERSION=$(MAJOR).$(MINOR).$(REVISION)
+LIBMAJOR=$(LIBBASE).$(MAJOR)
+LIB=$(LIBMAJOR).$(MINOR).$(REVISION)
diff --git a/Makefile.lib b/Makefile.lib
new file mode 100644
index 0000000..58bd153
--- /dev/null
+++ b/Makefile.lib
@@ -0,0 +1,24 @@
+TOP=.
+SUBDIRS=src src/sim src/intel src/x11
+include Makefile.defs
+
+LIBS=-Wl,--no-undefined $(LIB_BACKEND) $(LOCAL_LIBS) -ldl -lpthread
+
+all: $(LIB) $(LIBMAJOR) $(LIBBASE)
+
+$(LIB): $(OBJ) $(LIB_BACKEND)
+ $(CXX) $(CXXFLAGS) -shared -o $@ $(OBJ) $(LIBS)
+
+$(LIBMAJOR): $(LIB)
+ rm -f $@
+ ln -s $(LIB) $@
+
+$(LIBBASE): $(LIBMAJOR)
+ rm -f $@
+ ln -s $(LIBMAJOR) $@
+
+clean:
+ rm -f $(LIB) $(LIBMAJOR) $(LIBBASE)
+
+$(LIB_BACKEND):
+ +cd backend && make all
diff --git a/Makefile.shared b/Makefile.shared
new file mode 100644
index 0000000..882b079
--- /dev/null
+++ b/Makefile.shared
@@ -0,0 +1,15 @@
+include $(TOP)/Makefile.defs
+
+all:: $(LIB)
+
+$(OBJ): $(INC)
+
+clean::
+ rm -f $(OBJ)
+ +cd $(TOP) && make -f Makefile.lib clean
+
+$(LIB): $(OBJ)
+ +cd $(TOP) && make -f Makefile.lib
+
+list-obj:
+ @echo $(OBJ)
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..c03fbfd
--- /dev/null
+++ b/README.md
@@ -0,0 +1,133 @@
+Beignet
+=======
+
+Beignet is an open source implementaion of the OpenCL specification - a generic
+compute oriented API. This code base contains the code to run OpenCL programs on
+Intel GPUs which bsically defines and implements the OpenCL host functions
+required to initialize the device, create the command queues, the kernels and
+the programs and run them on the GPU. The code base also contains the compiler
+part of the stack which is included in `backend/`. For more specific information
+about the compiler, please refer to `backend/README.md`
+
+How to build
+------------
+
+There are two ways to build Beignet.
+
+The first one uses a simple Makefile. Just type `make` and the project will
+build if everything is properly installed.
+
+The project also uses CMake with three profiles:
+
+1. Debug (-g)
+2. RelWithDebInfo (-g with optimizations)
+3. Release (only optimizations)
+
+Basically, from the root directory of the project
+
+`> mkdir build`
+
+`> ccmake ../ # to configure`
+
+Choose whatever you want for the build.
+
+Then press 'c' to configure and 'g' to generate the code.
+
+`> make`
+
+The project depends on several external libraries:
+
+- Several X components (XLib, Xfixes, Xext)
+- libdrm libraries (libdrm and libdrm\_intel)
+- Various LLVM components
+
+CMake will check the dependencies and will complain if it does not find them.
+
+Once built, the run-time produces a shared object libcl.so which basically
+directly implements the OpenCL API. A set of tests are also produced. They may
+be found in `utests/`.
+
+Note that the compiler depends on LLVM (Low-Level Virtual Machine project).
+Right now, the code has only been compiled with LLVM 3.0. It will not compile
+with any thing older.
+
+[http://llvm.org/releases/](http://llvm.org/releases/)
+
+LLVM 3.0 and 3.1 are supported. LLVM 3.2 is partially supported right now. More
+work is needed to make it fully work.
+
+Also note that the code was compiled on GCC 4.6 and GCC 4.7. Since the code uses
+really recent C++11 features, you may expect problems with older compilers. Last
+time I tried, the code breaks ICC 12 and Clang with internal compiler errors
+while compiling anonymous nested lambda functions.
+
+How to run
+----------
+
+Apart from the OpenCL library itself that can be used by any OpenCL application,
+this code also produces various tests to ensure the compiler and the run-time
+consistency. This small test framework uses a simple c++ registration system to
+register all the unit tests.
+
+You need to set the variable `OCL_KERNEL_PATH` to locate the OCL kernels. They
+are with the run-time in `./kernels`.
+
+Then in `utests/`:
+
+`> ./utest_run`
+
+will run all the unit tests one after the others
+
+`> ./utest_run some_unit_test0 some_unit_test1`
+
+will only run `some_unit_test0` and `some_unit_test1` tests
+
+Supported Hardware
+------------------
+
+As an important remark, the code was only tested on IVB GT2 with a rather
+minimal Linux distribution (ArchLinux) and a very small desktop (dwm). If you
+use something more sophisticated using compiz or similar stuffs, you may expect
+serious problems and GPU hangs.
+
+Only IVB is supported right now. Actually, the code was only run on IVB GT2. You
+may expect some issues with IVB GT1.
+
+TODO
+----
+
+The run-time is far from being complete. Most of the pieces have been put
+together to test and develop the OpenCL compiler. A partial list of things to
+do:
+
+- Support for samplers / textures but it should be rather easy since the
+ low-level parts of the code already supports it
+
+- Support for events
+
+- Check that NDRangeKernels can be pushed into _different_ queues from several
+ threads
+
+- Support for Enqueue\*Buffer. I added a straightforward extension to map /
+ unmap buffer. This extension `clIntelMapBuffer` directly maps `dri_bo_map`
+ which is really convenient
+
+- Full support for images. Today, the code just tiles everything *manually*
+ which is really bad. I think the best solution to copy and create images is to
+ use the GPU and typed writes (scatter to textures) or samplers. We would
+ however need the vmap extension proposed by Chris Wilson to be able to map
+ user pointers while doing to copies and the conversions.
+
+- No state tracking at all. One batch buffer is created at each "draw call"
+ (i.e. for each NDRangeKernels). This is really inefficient since some
+ expensive pipe controls are issued for each batch buffer
+
+- Valgrind reports some leaks in libdrm. It sounds like a false positive but it
+ has to be checked. Idem for LLVM. There is one leak here to check
+
+More generally, everything in the run-time that triggers the "FATAL" macro means
+that something that must be supported is not implemented properly (either it
+does not comply with the standard or it is just missing)
+
+Ben Segovia (<benjamin.segovia at intel.com>)
+
diff --git a/backend/.gitignore b/backend/.gitignore
new file mode 100644
index 0000000..4d59032
--- /dev/null
+++ b/backend/.gitignore
@@ -0,0 +1,3 @@
+*.o
+*.so*
+MakeOut
diff --git a/backend/CMakeLists.txt b/backend/CMakeLists.txt
new file mode 100644
index 0000000..c36f970
--- /dev/null
+++ b/backend/CMakeLists.txt
@@ -0,0 +1,97 @@
+project (GBE)
+
+cmake_minimum_required (VERSION 2.6.0)
+
+set (GBE_CMAKE_DIR "${GBE_SOURCE_DIR}/cmake")
+set (CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${GBE_CMAKE_DIR}")
+
+##############################################################
+# Compilation directives
+##############################################################
+
+set (GBE_DEBUG_MEMORY false CACHE bool "Activate the memory debugger")
+set (GBE_USE_BLOB false CACHE bool "Compile everything from one big file")
+
+##############################################################
+# Compiler
+##############################################################
+if (UNIX)
+ set (COMPILER "GCC" CACHE INT "Compiler to choose on Linux (GCC,ICC,CLANG)")
+endif (UNIX)
+
+# Force Release with debug info
+if (NOT CMAKE_BUILD_TYPE)
+ set (CMAKE_BUILD_TYPE RelWithDebInfo)
+endif (NOT CMAKE_BUILD_TYPE)
+set (CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE STRING "assure config" FORCE)
+message(STATUS "Building mode: " ${CMAKE_BUILD_TYPE})
+
+if (GBE_DEBUG_MEMORY)
+ set (GBE_DEBUG_MEMORY_FLAG "-DGBE_DEBUG_MEMORY=1")
+else (GBE_DEBUG_MEMORY)
+ set (GBE_DEBUG_MEMORY_FLAG "-DGBE_DEBUG_MEMORY=0")
+endif (GBE_DEBUG_MEMORY)
+
+# Hide all symbols and allows the symbols declared as visible to be exported
+set (CMAKE_C_CXX_FLAGS "-fvisibility=hidden")
+
+if (COMPILER STREQUAL "GCC")
+ set (CMAKE_C_CXX_FLAGS "${CMAKE_C_CXX_FLAGS} -funroll-loops -Wstrict-aliasing=2 -fstrict-aliasing -msse2 -msse3 -mssse3 -msse4.1 -ffast-math -fPIC -Wall")
+ set (CMAKE_CXX_FLAGS "${CMAKE_C_CXX_FLAGS} -fno-exceptions -Wno-invalid-offsetof -fno-rtti -std=c++0x")
+ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_DEBUG_MEMORY_FLAG}")
+ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_COMPILE_UTESTS_FLAG}")
+ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,-E")
+ set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--no-undefined")
+ set (CMAKE_CXX_FLAGS_DEBUG "-g -DGBE_DEBUG=1")
+ set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -DGBE_DEBUG=1")
+ set (CMAKE_CXX_FLAGS_MINSIZEREL "-Os -DNDEBUG -DGBE_DEBUG=0")
+ set (CMAKE_CXX_FLAGS_RELEASE "-O2 -DNDEBUG -DGBE_DEBUG=0")
+ set (CMAKE_C_FLAGS "${CMAKE_C_CXX_FLAGS}")
+ set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${GBE_DEBUG_MEMORY_FLAG}")
+ set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${GBE_COMPILE_UTESTS_FLAG}")
+ set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wl,-E")
+ set (CMAKE_C_FLAGS_DEBUG "-g -DGBE_DEBUG=1")
+ set (CMAKE_C_FLAGS_RELWITHDEBINFO "-O2 -g -DGBE_DEBUG=1")
+ set (CMAKE_C_FLAGS_MINSIZEREL "-Os -DNDEBUG -DGBE_DEBUG=0")
+ set (CMAKE_C_FLAGS_RELEASE "-O2 -DNDEBUG -DGBE_DEBUG=0")
+elseif (COMPILER STREQUAL "CLANG")
+ set (CMAKE_C_COMPILER "clang")
+ set (CMAKE_C_FLAGS "-Wall -std=c99")
+ set (CMAKE_C_FLAGS_DEBUG "-g -DGBE_DEBUG=1")
+ set (CMAKE_C_FLAGS_RELWITHDEBINFO "-O2 -g -DGBE_DEBUG=1")
+ set (CMAKE_C_FLAGS_MINSIZEREL "-Os -DNDEBUG -DGBE_DEBUG=0")
+ set (CMAKE_C_FLAGS_RELEASE "-O2 -DNDEBUG -DGBE_DEBUG=0")
+ set (CMAKE_CXX_COMPILER "clang++")
+ set (CMAKE_CXX_FLAGS "-fstrict-aliasing -msse2 -ffast-math -fPIC -Wall -Wno-format-security -Wno-invalid-offsetof -std=c++0x")
+ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_DEBUG_MEMORY_FLAG}")
+ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_COMPILE_UTESTS_FLAG}")
+ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${VISIBILITY_FLAG}")
+ set (CMAKE_CXX_FLAGS_DEBUG "-g -DGBE_DEBUG=1")
+ set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -DGBE_DEBUG=1")
+ set (CMAKE_CXX_FLAGS_MINSIZEREL "-Os -DNDEBUG -DGBE_DEBUG=0")
+ set (CMAKE_CXX_FLAGS_RELEASE "-O2 -DNDEBUG -DGBE_DEBUG=0")
+ set (CMAKE_AR "/usr/bin/llvm-ar")
+ set (CMAKE_LINKER "/usr/bin/llvm-ld")
+ set (CMAKE_NM "/usr/bin/llvm-nm")
+ set (CMAKE_OBJDUMP "/usr/bin/llvm-objdump")
+ set (CMAKE_RANLIB "ranlib")
+elseif (COMPILER STREQUAL "ICC")
+ set (CMAKE_CXX_COMPILER "icpc")
+ set (CMAKE_C_COMPILER "icc")
+ set (CMAKE_CXX_FLAGS "-std=c++0x -wd2928 -Wall -fPIC -fstrict-aliasing -fp-model fast -xSSE2")
+ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_DEBUG_MEMORY_FLAG}")
+ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_COMPILE_UTESTS_FLAG}")
+ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${VISIBILITY_FLAG} -Wl,-E")
+ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_DEBUG_MODE_FLAG}")
+ set (CMAKE_CXX_FLAGS_DEBUG "-g -O0 -DGBE_DEBUG=1")
+ set (CCMAKE_CXX_FLAGS_RELWITHDEBINFO "-g -O2 -DGBE_DEBUG=1")
+ set (CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG -O2 -DGBE_DEBUG=0")
+ set (CCMAKE_CXX_FLAGS_MINSIZEREL "-Os -DGBE_DEBUG=0")
+ set (CMAKE_EXE_LINKER_FLAGS "")
+endif ()
+
+##############################################################
+# Project source code
+##############################################################
+add_subdirectory (src)
+
diff --git a/backend/Makefile b/backend/Makefile
new file mode 100644
index 0000000..bf4aca9
--- /dev/null
+++ b/backend/Makefile
@@ -0,0 +1,4 @@
+TOP=.
+SUBDIRS=src/backend src/backend/gen src/backend/sim src/ir src/llvm src/sys
+
+include $(TOP)/Makefile.shared
diff --git a/backend/Makefile.defs b/backend/Makefile.defs
new file mode 100644
index 0000000..ac77901
--- /dev/null
+++ b/backend/Makefile.defs
@@ -0,0 +1,25 @@
+LOCAL_CFLAGS=-funroll-loops -Wstrict-aliasing=2 -fstrict-aliasing -fno-exceptions\
+ -msse2 -msse3 -mssse3 -msse4.1 -ffast-math -fPIC -Wall \
+ -DGBE_DEBUG=1 -I$(TOP)/src
+
+LOCAL_CXXFLAGS=$(LOCAL_CFLAGS) -std=c++0x -fno-rtti -Wno-invalid-offsetof
+
+CXXFLAGS=$(LOCAL_CXXFLAGS) $(shell llvm-config --cxxflags | sed 's/\-pedantic//g')
+
+CFLAGS=$(LOCAL_CFLAGS) $(shell llvm-config --cflags) -std=gnu9x
+
+INC=$(shell for i in $(SUBDIRS); do ls $$i/*.h* 2>/dev/null; done)
+
+C_SRC=$(shell for i in $(SUBDIRS); do ls $$i/*.c 2>/dev/null; done)
+CPP_SRC=$(shell for i in $(SUBDIRS); do ls $$i/*.cpp 2>/dev/null; done)
+
+OBJ=$(C_SRC:.c=.o) $(CPP_SRC:.cpp=.o)
+
+LIBBASE=libgbe.so
+MAJOR=0
+MINOR=0
+REVISION=1
+
+VERSION=$(MAJOR).$(MINOR).$(REVISION)
+LIBMAJOR=$(LIBBASE).$(MAJOR)
+LIB=$(LIBMAJOR).$(MINOR).$(REVISION)
diff --git a/backend/Makefile.lib b/backend/Makefile.lib
new file mode 100644
index 0000000..468b0c3
--- /dev/null
+++ b/backend/Makefile.lib
@@ -0,0 +1,23 @@
+TOP=.
+SUBDIRS=src/backend src/backend/gen src/backend/sim src/ir src/llvm src/sys src
+include Makefile.defs
+
+LIBS=-Wl,--no-undefined -L$(shell llvm-config --libdir) $(shell llvm-config --libs) -ldl -lpthread
+
+VERSION=0.0.1
+
+all: $(LIB) $(LIBMAJOR) $(LIBBASE)
+
+$(LIB): $(OBJ)
+ $(CXX) $(CXXFLAGS) -shared -o $@ $(OBJ) $(LIBS)
+
+$(LIBMAJOR): $(LIB)
+ rm -f $@
+ ln -s $(LIB) $@
+
+$(LIBBASE): $(LIBMAJOR)
+ rm -f $@
+ ln -s $(LIBMAJOR) $@
+
+clean:
+ rm -f $(LIB) $(LIBMAJOR) $(LIBBASE)
diff --git a/backend/Makefile.shared b/backend/Makefile.shared
new file mode 100644
index 0000000..4b048d5
--- /dev/null
+++ b/backend/Makefile.shared
@@ -0,0 +1,15 @@
+include $(TOP)/Makefile.defs
+
+all: $(LIB)
+
+$(OBJ): $(INC)
+
+clean:
+ rm -f $(OBJ)
+ +cd $(TOP) && make -f Makefile.lib clean
+
+$(LIB): $(OBJ)
+ +cd $(TOP) && make -f Makefile.lib
+
+list-obj:
+ @echo $(OBJ)
diff --git a/backend/README.md b/backend/README.md
new file mode 100644
index 0000000..5617fd2
--- /dev/null
+++ b/backend/README.md
@@ -0,0 +1,57 @@
+Beignet Compiler
+================
+
+This code base contains the compiler part of the Beignet OpenCL stack. The
+compiler is responsible to take a OpenCL language string and to compile it into
+a binary that can be executed on Intel integrated GPUs.
+
+Limitations
+-----------
+
+Today, the compiler is far from complete. See [here](doc/TODO.html) for a
+(incomplete) lists of things to do.
+
+Interface with the run-time
+---------------------------
+
+Even if the compiler makes a very liberal use of C++ (templates, variadic
+templates, macros), we really tried hard to make a very simple interface with
+the run-time. The interface is therefore a pure C99 interface and it is defined
+in `src/backend/program.h`.
+
+The goal is to hide the complexity of the inner data structures and to enable
+simple run-time implementation using straightforward C99.
+
+Note that the data structures are fully opaque: this allows us to use both the
+C++ simulator or the real Gen program in a relatively non-intrusive way.
+
+Various environment variables
+-----------------------------
+
+Environment variables are used all over the code. Most important ones are:
+
+- `OCL_SIMD_WIDTH` `(8 or 16)`. Change the number of lanes per hardware thread
+
+- `OCL_OUTPUT_GEN_IR` `(0 or 1)`. Output Gen IR (scalar intermediate
+ representation) code
+
+- `OCL_OUTPUT_LLVM` `(0 or 1)`. Output LLVM code after the lowering passes
+
+- `OCL_OUTPUT_LLVM_BEFORE_EXTRA_PASS` `(0 or 1)`. Output LLVM code before the
+ lowering passes
+
+- `OCL_OUTPUT_ASM` `(0 or 1)`. Output Gen ISA
+
+Implementation details
+----------------------
+
+Several key decisions may use the hardware in an usual way. See the following
+documents for the technical details about the compiler implementation:
+
+- [Flat address space](doc/flat\_address\_space.html)
+- [Unstructured branches](doc/unstructured\_branches.html)
+- [Scalar intermediate representation](doc/gen\_ir.html)
+- [Clean backend implementation](doc/compiler_backend.html)
+
+Ben Segovia (<benjamin.segovia at intel.com>)
+
diff --git a/backend/doc/TODO.md b/backend/doc/TODO.md
new file mode 100644
index 0000000..9f5a934
--- /dev/null
+++ b/backend/doc/TODO.md
@@ -0,0 +1,120 @@
+TODO
+====
+
+The compiler is far from complete. Even if the skeleton is now done and should
+be solid, There are a _lot_ of things to do from trivial to complex.
+
+OpenCL standard library
+-----------------------
+
+Today we define the OpenCL API in header file `src/ocl_stdlib.h`. This file is
+from being complete.
+
+By the way, one question remains: do we want to implement
+the high-precision functions as _inline_ functions or as external functions to
+call? Indeed, inlining all functions may lead to severe code bloats while
+calling functions will require to implement a proper ABI. We certainly want to
+do both actually.
+
+LLVM front-end
+--------------
+
+The code is defined in `src/llvm`. We used the PTX ABI and the OpenCL profile
+to compile the code. Therefore, a good part of the job is already done. However,
+many things must be implemented:
+
+- Lowering down of various intrinsics like `llvm.memcpy`
+
+- Implementation of most of the OpenCL built-ins (`native_cos`, `native_sin`,
+ `mad`, atomic operations, barriers...)
+
+- Lowering down of int16 / int8 / float16 / char16 / char8 / char4 loads and
+ stores into the supported loads and stores
+
+- Support for constant buffers declared in the OpenCL source file
+
+- Support for local declaration of local array (the OpenCL profile will properly
+ declare them as global arrays)
+
+- Support for doubles
+
+- Support for images. This will require to ensure that images are only directly
+ accessed
+
+- Better resolving of the PHI functions. Today, we always generate MOV
+ instructions at the end of each basic block . They can be easily optimized.
+
+Gen IR
+------
+
+The code is defined in `src/ir`. Main things to do are:
+
+- Bringing support for doubles
+
+- Adding proper support for SAMPLE and TYPED_WRITE instructions
+
+- Adding support for BARRIER instructions
+
+- Adding support for all the math instructions (native_cos, native_sin...)
+
+- Finishing the handling of function arguments (see the [IR
+ description](gen_ir.html) for more details)
+
+- Adding support for constant data per unit
+
+- Adding support for linking IR units together. OpenCL indeed allows to create
+ programs from several sources
+
+- Uniform analysys. This is a major performance improvement. A "uniform" value
+ is basically a value where regardless the control flow, all the activated
+ lanes will be identical. Trivial examples are immediate values, function
+ arguments. Also, operations on uniform will produce uniform values and so
+ on...
+
+- Merging of independent uniform loads (and samples). This is a major
+ performance improvement once the uniform analysis is done. Basically, several
+ uniform loads may be collapsed into one load if no writes happens in-between.
+ This will obviously impact both instruction selection and the register
+ allocation.
+
+Backend
+-------
+
+The code is defined in `src/backend`. Main things to do are:
+
+- Bringing backend support for the missing instructions described above
+ (native_sin, native_cos, barriers, samples...)
+
+- Implementing support for doubles
+
+- Implementing register spilling (see the [compiler backend
+ description](./compiler_backend.html) for more details)
+
+- Implementing proper instruction selection. A "simple" tree matching algorithm
+ should provide good results for Gen
+
+- Implementing the instruction scheduling pass
+
+General plumbing
+----------------
+
+I tried to keep the code clean, well, as far as C++ can be really clean. There
+are some header cleaning steps required though, in particular in the backend
+code.
+
+The context used in the IR code generation (see `src/ir/context.*pp`) should be
+split up and cleaned up too.
+
+I also purely and simply copied and pasted the Gen ISA disassembler from Mesa.
+This leads to code duplication. Also some messages used by OpenCL (untyped reads
+and writes) are not properly decoded yet.
+
+There are some quick and dirty hacks also like the use of function call `system`
+(...). This should be cleanly replaced by popen and stuff. I also directly
+called the LLVM compiler executable instead of using Clang library. All of this
+should be improved and cleaned up. Track "XXX" comments in the code.
+
+Parts of the code leaks memory when exceptions are used. There are some pointers
+to track and replace with std::unique_ptr. Note that we also add a custom memory
+debugger that nicely complements (i.e. it is fast) Valgrind.
+
diff --git a/backend/doc/compiler_backend.md b/backend/doc/compiler_backend.md
new file mode 100644
index 0000000..eb5faa7
--- /dev/null
+++ b/backend/doc/compiler_backend.md
@@ -0,0 +1,112 @@
+Compiler Back End
+=================
+
+Well, the complete code base is somehow a compiler backend for LLVM. Here, we
+really speak about the final code generation passes that you may find in
+`src/backend`.
+
+As explained in [the scalar IR presentation](./gen_ir.html), we bet on a very
+simple scalar IR to make it easy to parse and modify. The idea is to fix the
+unrelated problem (very Gen specific) where we can i.e. when the code is
+generated.
+
+The code generation in the compiler backend is classically divided into four
+steps
+
+- Instruction selection (defined in `src/backend/gen_insn_selection.*pp`). We
+ expose an interface for the instruction selection engine. We implemented a
+ very simple selection (called `SimpleSelection`) that does a quick and dirty
+ one-to-many instruction generation.
+
+- Register allocation (defined in `src/backend/gen_reg_allocation.*pp`). The
+ code implements a linear scan allocator on the code selected in the previous
+ pass. See below for more details about register vector allocations.
+
+- Instruction scheduling. This one is not done yet. We just output the same
+ instruction order as the program order. Note that we plan to implement an
+ adaptive scheduling between register allocation and instruction selection (to
+ avoid spilling as much as possible)
+
+- Instruction encoding. This is the final step that encodes the program into Gen
+ ISA.
+
+Instruction selection
+---------------------
+
+Usually, the instruction selection consists in mapping `p` instructions to `q`
+ISA instructions under a cost driven model. Each basic block is therefore _tiled_
+into some numbers of groups of ISA instructions such that the final cost is
+minimized.
+
+The literature is particularly dense on the subject. Compilers usually use today
+either tree matching methods or selection DAG techniques (as LLVM backends do)
+
+The instruction selection is still a work in progress in our compiler and we
+only implement the most stupid (and inefficient) technique: we simply generate
+as many instructions as we need for each _individual_ IR instructions. Since we
+do not support immediate sources, this in particular leads to really ugly
+looking code such as `mov (16) r2:f 1.f`. It is still a work in progress.
+
+Other than that, the instruction selection is really a book keeping structure.
+We basically output `SelectionInstruction` objects which are the 1-to-1 mapping
+of Gen ISA encoding functions defined in `src/backend/gen_encoder.*pp`.
+
+However, the `SelectionInstruction` still use unallocated virtual registers and
+do *not* use vectors but simply tuples of virtual registers.
+
+Register allocation
+-------------------
+
+The register allocation actually consists in two steps:
+
+1. Handling the vector for all the instructions that require them
+
+2. Performing the register allocation itself
+
+Step 1 consists in scanning all the vectors required by sends. Obviously, the
+same register may be used in different vectors and that may lead to
+interferences. We simply sort the vectors from the largest to the smallest and
+allocate them in that order. As an optimization we also identify sub-vectors
+i.e. vectors included in larger ones and no not allocate them.
+
+The code may be largely improved in particular if we take into account liveness
+interferences as well. Basically, a register may be part of several vectors if the
+registers that are not in both vectors at the same location are not alive at the
+same time.
+
+This is still a work in progress. Code is right now handled by method
+`GenRegAllocator::allocateVector`.
+
+Step 2 performs the register allocation i.e. it associates each virtual register
+to one (or several) physical registers. The first thing is that the Gen register
+file is very flexible i.e. it can (almost) be freely partitioned. To handle this
+peculiarity, we simply implemented a free list based generic memory allocator as
+done with `RegisterFilePartitioner` in `src/backend/context.cpp`.
+
+We then simply implemented a linear scan allocator (see
+`gen_reg_allocation.cpp`). The spilling is not implemented and is still a work
+in progress. The thing is that spilling must be specifically handled with Gen.
+Indeed:
+
+1. Bad point. Spilling is expensive and require to assemble messages for it
+
+2. Good point. Gen is able to spill up to 256 _contiguous_ bytes in one message.
+This must be used for high performance spilling and this may require to reorder
+properly registers to spill.
+
+Instruction scheduling
+----------------------
+
+Intra-basic block instruction scheduling is relatively simple. It is not
+implemented yet.
+
+Instruction encoding
+--------------------
+
+This is mostly done in `src/backend/gen_context.cpp` and
+`src/backend/gen_encoder./*pp`. This is mostly glue code and it is pretty
+straightforward. We just forward the selection code using the physically
+allocated registers. There is nothing special here. Just boilerplate.
+
+[Up](../README.html)
+
diff --git a/backend/doc/flat_address_space.md b/backend/doc/flat_address_space.md
new file mode 100644
index 0000000..5465157
--- /dev/null
+++ b/backend/doc/flat_address_space.md
@@ -0,0 +1,101 @@
+Flat Address Space
+==================
+
+Segmented address space...
+--------------------------
+
+The first challenge with OpenCL is its very liberal use of pointers. The memory
+is segment into several address spaces:
+
+- private. This is the memory for each work item
+
+- global. These are buffers in memory shared by all work items and work groups
+
+- constant. These are constant buffers in memory shared by all work items and
+work groups as well
+
+- local. These is a memory shared by all work items in the *same* work group
+
+... But with no restriction inside each address space
+-----------------------------------------------------
+
+The challenge is that there is no restriction in OpenCL inside each address
+space i.e. the full C semantic applies in particular regarding pointer
+arithmetic.
+
+Therefore the following code is valid:
+
+<code>
+\_\_kernel void example(\_\_global int *dst, \_\_global int *src0, \_\_global int *src1)<br/>
+{<br/>
+ \_\_global int *from;<br/>
+ if (get\_global\_id(0) % 2)<br/>
+ from = src0;<br/>
+ else<br/>
+ from = src1;<br/>
+ dst[get\_global\_id(0)] = from[get\_global\_id(0)];<br/>
+}
+</code>
+
+As one may see, the load done in the last line actually mixes pointers from both
+source src0 and src1. This typically makes the use of binding table indices
+pretty hard. In we use binding table 0 for dst, 1 for src0 and 2 for src1 (for
+example), we are not able to express the load in the last line with one send
+only.
+
+No support for stateless in required messages
+---------------------------------------------
+
+Furthermore, in IVB, we are going four types of messages to implement the loads
+and the stores
+
+- Byte scattered reads. They are used to read bytes/shorts/integers that are not
+aligned on 4 bytes. This is a gather message i.e. the user provides up to 16
+addresses
+
+- Byte scattered writes. They are used to write bytes/shorts/integers that are not
+aligned on 4 bytes. This is a scatter message i.e. the user provides up to 16
+addresses
+
+- Untyped reads. They allow to read from 1 to 4 double words (i.e 4 bytes) per
+lane. This is also a gather message i.e. up to 16 address are provided per
+message.
+
+- Untyped writes. They are the counter part of the untyped reads
+
+Problem is that IVB does not support stateless accesses for these messages. So
+surfaces are required. Secondly, stateless messages are not that interesting
+since all of them require a header which is still slow to assemble.
+
+Implemented solution
+--------------------
+
+The solution is actually quite simple. Even with no stateless support, it is
+actually possible to simulate it with a surface. As one may see in the run-time
+code in `intel/intel_gpgpu.c`, we simply create a surface:
+
+- 2GB big
+
+- Which starts at offset 0
+
+Surprisingly, this surface can actually map the complete GTT address space which
+is 2GB big. One may look at `flat_address_space` unit test in the run-time code
+that creates and copies buffers in such a way that the complete GTT address
+space is traversed.
+
+This solution brings a pretty simple implementation in the compiler side.
+Basically, there is nothing to do when translating from LLVM to Gen ISA. A
+pointer to `__global` or `__constant` memory is simply a 32 bits offset in that
+surface.
+
+Related problems
+----------------
+
+There is one drawback for this approach. Since we use a 2GB surface that maps
+the complete GTT space, there is no protection at all. Each write can therefore
+potentially modify any buffer including the command buffer, the frame buffer or
+the kernel code. There is *no* protection at all in the hardware to prevent
+that.
+
+[Up](../README.html)
+
diff --git a/backend/doc/gen_ir.md b/backend/doc/gen_ir.md
new file mode 100644
index 0000000..2c5c0d0
--- /dev/null
+++ b/backend/doc/gen_ir.md
@@ -0,0 +1,256 @@
+Scalar Intermediate Representation
+==================================
+
+The IR code is included in `src/ir/` of the compiler code base
+The IR as designed in this compiler is the fruit of a long reflection I mostly
+have with Thomas Raoux. Note I usually call it "Gen IR".
+
+Scalar vs vector IR
+-------------------
+
+This is actually the major question: do we need a vector IR or a scalar IR? On
+the LLVM side, we have both. LLVM IR can manipulate vectors and scalars (and
+even generalized values but we can ignore it for now).
+
+For that reason, the Clang front-end generates both scalar and vector code.
+Typically, a `uint4` variable will output a vector of 4 integers. Arithmetic
+computations will be directly done on vector variables.
+
+One the HW side, the situation is completely different:
+
+- We are going to use the parallel mode (align1) i.e. the struct-of-array mode
+ for the EU. This is a SIMD scalar mode.
+
+- The only source of vectors we are going to have is on the sends instructions
+ (and marginally for some other instructions like the div_rem math instruction)
+
+One may therefore argue that we need vector instructions to handle the sends.
+Send will indeed require both vector destinations and sources. This may be a
+strong argument *for* vectors in the IR. However, the situation is not that
+good.
+
+Indeed, if we look carefully at the send instructions we see that they will
+require vectors that are *not* vectors in LLVM IR. This code for example:
+
+<code>
+__global uint4 *src;<br/>
+uint4 x = src[get\_global\_id(0)];<br/>
+</code>
+
+will be translated into an untyped write in the Gen ISA. Unfortunately, the
+address and the values to write are in the *same* vector. However, LLVM IR will
+output a store like:
+
+`store(%addr, %value)`
+
+which basically uses one scalar (the address) and one value (the vector to
+write). Therefore even if we handle vectors in the IR, that will not directly
+solve the problem we have at the end for the send instructions.
+
+We therefore decided to go the other direction:
+
+- We have a purely scalar IR
+
+- To replace vectors, we simply use multiple sources and destinations
+
+- Real vectors required by send instructions are handled at the very bottom of
+the stack in the register allocation passes.
+
+This leads to a very simple intermediate representation which is mostly a pure
+scalar RISC machine.
+
+Very limited IR
+---------------
+
+The other major question, in particular when you look similar stacks like NVidia
+PTX, is:
+
+do we need to encode in the IR register modifiers (abs, negate...) and immediate
+registers (like in add.f x y 1.0)?
+
+Contrary to other IRs (PTX and even LLVM that both supports immediates), we also
+chose to have a very simply IR, much simpler than the final ISA, and to merge
+back what we need at the instruction selection pass. Since we need instruction
+selection, let us keep the IR simple.
+
+Also, there are a lot of major issues that can not be covered in the IR and
+require to be specifically handled at the very end of the code:
+
+- send vectors (see previous section)
+
+- send headers (value and register allocation) which are also part of the vector
+problem
+
+- SIMD8 mode in SIMD16 code. Some send messages do not support SIMD16 encoding
+and require SIMD8. Typically examples are typed writes i.e. scatters to textures.
+Also, this cannot be encoded in some way in a regular scalar IR.
+
+For these reasons, most of the problems directly related to Gen naturally find
+their solutions in either the instruction selection or the register allocator.
+
+This leads to the following strategy:
+
+- Keep the IR very simple and limited
+
+- Use all the analysis tools you need in the IR before the final code generation
+to build any information you need. This is pure "book-keeping".
+
+- Use any previous analysis and finish the job at the very end
+
+This classical approach leads to limit the complexity in the IR while forcing us
+to write the proper tools in the final stages.
+
+Why not using LLVM IR directly?
+-------------------------------
+
+We hesitated a long time between writing a dedicated IR (as we did) and just
+using LLVM IR. Indeed, LLVM comes with a large set of tools that are parts of
+"LLVM backends". LLVM provides a lot of tools to perform the instruction
+selection (`SelectionDAG`) and the register allocation. Two things however
+prevent us from choosing this path:
+
+- We only have a limited experience with LLVM and no experience at all with the
+LLVM backends
+
+- LLVM register allocators do not handle at all the peculiarities of Gen:
+
+ * flexible register file. Gen registers are more like memory than registers
+ and can be freely allocated and aliased. LLVM register allocators only
+ support partial aliasing like x86 machines do (rax -> eax -> ax)
+
+ * no proper tools to handle vectors in the register allocator as we need for
+ sends
+
+Since we will need to do some significant work anyway, this leads us to choose a
+more hard-coded path with a in-house IR. Note that will not prevent us from
+implementing later a LLVM backend "by the book" as Nvidia does today with PTX
+(using a LLVM backend to do the LLVM IR -> PTX conversion)
+
+SSA or no SSA
+-------------
+
+Since we have a purely scalar IR, implementing a SSA transformation on the IR
+may be convenient. However, most the literature about compiler back-ends use
+non-SSA representation of the code. Since the primary goal is to write a
+compiler _back-end_ (instruction selection, register allocation and instruction
+scheduling), we keep the code in non-SSA letting the higher level optimizations
+to LLVM.
+
+Types, registers, instructions, functions and units
+---------------------------------------------------
+
+The IR is organized as follows:
+
+- Types (defined in `src/ir/type.*pp`). These are scalar types only. Since the
+ code is completely lowered down, there is no more reference to structures,
+ pointers or vectors. Everything is scalar values and when "vectors" or
+ "structures" would be needed, we use instead multiple scalar sources or
+ destinations.
+
+- Registers (defined in `src/ir/register.*pp`). They are untyped (since Gen IR
+ are untyped) and we have 65,535 of them per function
+
+- Instructions (defined in `src/ir/instruction.*pp`). They are typed (to
+ distinguish integer and FP adds for example) and possibly support multiple
+ destinations and sources. We also provide a convenient framework to introspect
+ the instruction in a simple (and memory efficient) way
+
+- Functions (defined in `src/ir/function.*pp`). They are basically the counter
+ part of LLVM functions or OpenCL kernels. Note that function arguments are a
+ problem. We actually use the PTX ABI. Everything smaller than the machine word
+ size (i.e. 32 bits for Gen) is passed by value with a register. Everything
+ else which is bigger than is passed by pointer with a ByVal attribute.
+ Note that requires some special treatment in the IR (see below) to make the
+ code faster by replacing function argument loads by "pushed constants". We
+ also defined one "register file" per function i.e. the registers are defined
+ relatively to the function that uses them. Each function is made of basic
+ blocks i.e. sequence of instructions that are executed linearly.
+
+- Units (defined in `src/ir/unit.*pp`). Units are just a collection of
+ functions and constants (not supported yet).
+
+Function arguments and pushed constants
+---------------------------------------
+
+Gen can push values into the register file i.e. some registers are preset when
+the kernel starts to run. As detailed previously, the PTX ABI is convenient
+since every argument is either one register or one pointer to load from or to
+store to.
+
+However, when a pointer is used for an argument, loads are issued which may be
+avoided by using constant pushes.
+
+Once again OCL makes the task a bit harder than expected. Indeed, the C
+semantic once again applies to function arguments as well.
+
+Look at these three examples:
+
+### Case 1. Direct loads -> constant push can be used
+
+<code>
+struct foo { int x; int y; }; </br>
+\_\_kernel void case1(\_\_global int *dst, struct foo bar) </br>
+{<br/>
+ dst[get\_global\_id(0)] = bar.x + bar.y;<br/>
+}
+</code>
+
+We use a _direct_ _load_ for `bar` with `bar.x` and `bar.y`. Values can be
+pushed into registers and we can replace the loads by register reads.
+
+### Case 2. Indirect loads -> we need to load the values from memory
+
+<code>
+struct foo { int x[16]; }; </br>
+\_\_kernel void case1(\_\_global int *dst, struct foo bar) </br>
+{<br/>
+ dst[get\_global\_id(0)] = bar.x[get\_local\_id(0)];<br/>
+}
+</code>
+
+We use an indirect load with `bar.x[get\_local\_id(0)]`. Here we need to issue a
+load from memory (well, actually, we could do a gather from registers, but it is
+not supported yet).
+
+### Case 3. Writes to arguments -> we need to spill the values to memory first
+
+<code>
+struct foo { int x[16]; }; </br>
+\_\_kernel void case1(\_\_global int *dst, struct foo bar) </br>
+{<br/>
+bar.x[0] = get\_global\_id(1);<br/>
+ dst[get\_global\_id(0)] = bar.x[get\_local\_id(0)];<br/>
+}
+</code>
+
+Here the values are written before being read. This causes some troubles since
+we are running in SIMD mode. Indeed, we only have in memory *one* instance of
+the function arguments. Here, *many* SIMD lanes and actually *many* hardware
+threads are running at the same time. This means that we can not write the data
+to memory. We need to allocate a private area for each SIMD lane.
+
+In that case, we need to spill back the function arguments into memory. We spill
+once per SIMD lane. Then, we read from this private area rather than the
+function arguments directly.
+
+This analysis is partially done today in `src/ir/lowering.*pp`. We identify all
+the cases but only the case with constant pushing is fully implemented.
+Actually, the two last cases are easy to implement but this requires one or two
+days of work.
+
+Value and liveness analysis tools
+---------------------------------
+
+You may also notice that we provide a complete framework for value analysis
+(i.e. to figure when a value or instruction destination is used and where the
+instruction sources come from). The code is in `src/ir/value.*pp`. Well, today,
+this code will burn a crazy amount of memory (use of std::set all over the
+place) but it at least provides the analysis required by many other passes.
+Compacting the data structures and using O(n) algorithms instead of the O(ln(n))
+are in the TODO list for sure :-)
+
+Finally, we also provide a liveness analysis tool which simply figures out which
+registers are alive at the end of each block (classically "live out" sets).
+
+[Up](../README.html)
+
diff --git a/backend/doc/unstructured_branches.md b/backend/doc/unstructured_branches.md
new file mode 100644
index 0000000..52ee671
--- /dev/null
+++ b/backend/doc/unstructured_branches.md
@@ -0,0 +1,274 @@
+Unstructured Branches
+=====================
+
+A major challenge in making a OpenCL compiler is certainly to handle any kind of
+branches. Indeed LLVM does not make any distinction between structured branches.
+See [here](http://llvm.org/docs/LangRef.html) for a complete description of
+the LLVM assembly specification.
+
+The C branching code is simply lowered down in the following instructions:
+
+- `ret` to return from the current function
+- `br` that, if predicated, possibly jumps to two destinations (one for the
+ taken branch and one for the other).
+- `switch` that implements the C switch/case construct.
+- `indirectbr` that implements a jump table
+- `invoke` and `resume` mostly used to handle exceptions
+
+Exceptions and jump tables are not supported in OpenCL. Switch cases can be
+lowered down to a sequence of if/else statements (using a divide and conquer
+approach a switch/case can be dispatched in log(n) complexity where n is the
+number of targets).
+
+This leads us to properly implement `br` and `ret` instructions.
+
+Solution 1 - Using Gen structured branches
+------------------------------------------
+
+Gen structured branches are the following instructions:
+
+`if` `else` `endif` `break` `continue` `while` `brd` `brc`
+
+Transforming the LLVM IR code into structured code results in basically
+reverse-engineering the LLVM code into the original C code.
+Unfortunately, there are several key problems:
+
+- OpenCL supports `goto` keyword that may jump to an arbitrary location
+- LLVM can transform the control flow graph in any kind of form
+- Worse is that a reducible control flow graph can be turned into an irreducible
+one by the optimizer.
+
+This can lead to complicated code transform and basic block duplication. The
+specification allows the compiler to abort if an irreducible control flow is
+detected but as an implementor, this is quite awkward to abort the compilation
+because the optimizer turns an reducible CFG to an irreducible one. Using
+structured branches is the open door to many corner cases.
+
+Thing is it exists a pretty elegant solution that can be almost seamlessly
+supported by Gen. This is the solution we retained.
+
+Solution 2 - Linearizing the control flow graph
+-----------------------------------------------
+
+The general problem is to map a general control flow graph to a SIMD machine.
+The problem is fairly well understood today. A recent research paper actually
+dedicated to OpenCL like languages which use the "SPMD" (single program multiple
+data) programming model present interesting insights about how to map SIMD
+architectures to such languages (see [here]
+(http://www.cdl.uni-saarland.de/papers/karrenberg_opencl.pdf)).
+
+### Core idea
+
+- Linearizing the CFG initially consists in removing all forward branches and
+"replace" them by predication. Indeed, the program will be still correct if you
+predicate instructions based instead of forward jumps. This is basically the
+a control flow to data flow conversion.
+
+- Of course, removing all forward branches is inefficient. To improve that, we
+simply introduce "if conditions" in the head of basic blocks to know if we run
+the basic block. If no lanes is going to be activated in the basic block, we
+jump to another basic block where _potentially_ some lanes are going to be
+reactivated.
+
+Consider the following CFG:
+
+<pre>
+o-------o
+| |
+| 1 |---->-----o
+| | |
+o-------o |
+ | |
+ | |
+o-------o |
+| | |
+| 2 |---->-----------o
+| | | |
+o-------o | |
+ | | |
+ | | |
+ | o------o | |
+ | | | | |
+ | v | | |
+o-------o | | |
+| | | | |
+| 3 | | | |
+| | | | |
+o-------o | | |
+ | | | | |
+ | o------o | |
+ | | |
+o-------o | |
+| | | |
+| 4 |<---------o |
+| | |
+o-------o |
+ | |
+ | |
+o-------o |
+| | |
+| 5 |<----------------o
+| |
+o-------o
+</pre>
+
+Mapping it to a SIMD machine may seem challenging. Actually it is not too
+complicated. The problem is with the 2->5 jump. Indeed, we have to be sure that
+we are not missing any computation done in block 4.
+
+To do so:
+- Instead of jumping from block 2 to block 5, we jump from block 2 to block 4.
+- We implement a `JOIN` point on top of block 4. We check if any lane is going
+to be reactivated for the block 4. If not, we jump to block 5.
+
+This leads to the following linearized CFG:
+<pre>
+o-------o
+| |
+| 1 |---->-----o
+| | |
+o-------o |
+ | |
+ | |
+o-------o |
+| | |
+| 2 |---->-----------o
+| | | |
+o-------o | |
+ | | |
+ | | |
+ | o--<---o | |
+ | | | | |
+ | v | | |
+o-------o | | |
+| | | | |
+| 3 | ^ | |
+| | | | |
+o-------o | | |
+ | | | | |
+ | o-->---o | |
+ | | |
+o-------o | |
+| |==========|=====|====O
+| 4 |<---------|-----o |
+| |<---------o |
+o-------o |
+ | |
+ | |
+o-------o |
+| | |
+| 5 |<====================O
+| |
+o-------o
+</pre>
+
+There is a new jump from block 4 to block 5.
+
+### Implementation on Gen
+
+When using structured branches, Gen can supports auto-masking i.e. based on the
+branches which are taken, the control flow is properly handled and masks are
+automatically applied on all instructions.
+
+However, there is no similar support for unstructured branches. We therefore
+decided to mask instructions manually and use single program flow. This is
+actually quite easy to do since Gen is able to predicate any branches.
+
+Now, how to evaluate the if conditions in an efficient way?
+
+The choice we did is to use *per-lane block IPs*: for each SIMD lane, we store a
+short (16 bits) for each lane in a regular 256 bits GPR (general purpose
+register). This "blockIP" register is used in the following way:
+
+At the beginning of each block, we compare the blockIP register with the ID of
+the block. The lane is going to be _activated_ if its blockIP is _smaller_ than
+the ID of the block. Otherwise, the lane is deactivated.
+
+Therefore, we build a flag register at the entry of each basic block with a
+single 16-wide uint16_t compare. If no lane is activated, a jump is performed to
+the next block where some lanes is going to be activated.
+
+Since this is regular jumps, we just use `jmpi` instruction. With the help of
+predication, we can express all the different possibilities:
+
+- backward branches are always taken if _any_ of lanes in the predicate is true.
+We just use `<+f0.0.anyh>` predication.
+- forward branches is *not* taken if some of the lanes are going to activated in
+the next block. We therefore compare the blockIP with the ID of the _next_
+block. If all of them are strictly greater than the ID of the next block, we
+jump. We therefore use the `<+f0.0.allh>` predicate in that case.
+- `JOIN` points are even simpler. We simply jump if none of the lane is activated.
+We therefore use the `<-f0.0.anyh>` predicate.
+
+The complete encoding is done in `src/backend/gen_insn_selection.cpp`. Forward
+branches are handled by `SimpleSelection::emitForwardBranch`. Backward branches
+are handled by `SimpleSelection::emitBackwardBranch`. Finally, since `JOIN` points
+are at the top of each basic blocks, they are handled by
+`SimpleSelection::emitLabelInstruction`.
+
+### Computing `JOIN` points
+
+The last problem is to compute `JOIN` point i.e. we need to know if we need to
+jump at the beginning of each block and if we do, what is the target of the
+branch. The code is relatively straightforward and can be found in
+`src/backend/context.cpp`. Function is `Context::buildJIPs`.
+</br>
+Actually, the current implementation is not that elegant. A colleague, Thomas
+Raoux, has a simpler and better idea to handle it.
+
+### Advantages and drawbacks of the method
+
+- The method has one decisive advantage: it is simple and extremely robust. It can
+handle any kind of CFGs (reducible or not) and does not require any
+transformation. The use of shorts is also not random. 16-wide compares is issued
+in 2 cycles (so it is twice fast as 16-wide 32 bits compares).
+- Main drawback will be performance. Even if this is not so bad, we still need
+more instructions than if we used structured branches. Mostly
+ * one or two instructions for `JOIN` points
+ * three instructions for backward and forward jumps (two more than structured
+ branches that just require the branch instruction itself)
+
+Note that all extra instructions are 16 bits instructions (i.e. they use shorts)
+so they will only cost 2 cycles anyway.
+
+The last point is that Gen encoding restricts conditional modifiers and
+predicates to be the same in the instruction. This requires to copy or recompute
+the flag register for compares and select. So one more instruction is required
+for these two instructions. Once again, this would require only 2 cycles.
+
+Remarks on `ret` instructions
+-----------------------------
+
+Since we can handle any kind of CFG, handling the return statements are
+relatively straightforward. We first create one return block at the end of the
+program. Then we replace all other returns by a unconditional jump to this
+block. The CFG linearization will take care of the rest.
+We then simply encode the (only one) return instruction as a End-Of-Thread
+message (EOT).
+
+Code examples
+-------------
+
+Some tests were written to assert the correctness of the CFG linearization and the
+code generation. They can be found in the _run-time_ code base here:
+
+`utest/compiler_if_else.cpp`
+
+`utest/compiler_lower_return0.cpp`
+
+`utest/compiler_lower_return1.cpp`
+
+`utest/compiler_lower_return2.cpp`
+
+`utest/compiler_short_scatter.cpp`
+
+`utest/compiler_unstructured_branch0.cpp`
+
+`utest/compiler_unstructured_branch1.cpp`
+
+`utest/compiler_unstructured_branch2.cpp`
+
+`utest/compiler_unstructured_branch3.cpp`
+
+[Up](../README.html)
+
diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
new file mode 100644
index 0000000..904b506
--- /dev/null
+++ b/backend/src/CMakeLists.txt
@@ -0,0 +1,114 @@
+add_subdirectory(llvm)
+
+macro (stringify TO_STRINGIFY_PATH TO_STRINGIFY_FILES)
+foreach (to_stringify_file ${TO_STRINGIFY_FILES})
+ set (input_file ${TO_STRINGIFY_PATH}/${to_stringify_file}.h)
+ set (output_file ${TO_STRINGIFY_PATH}/${to_stringify_file}_str.cpp)
+ set (string_header "\\\"string\\\"")
+ add_custom_command(
+ OUTPUT ${output_file}
+ COMMAND rm -rf ${output_file}
+ COMMAND echo "\\\#include ${string_header}" >> ${output_file}
+ COMMAND echo "namespace gbe {" >> ${output_file}
+ COMMAND echo "std::string ${to_stringify_file}_str = " >> ${output_file}
+ # Yeah!!! welcome to back slash hell
+ COMMAND cat ${input_file} |sed 's/\\\\/\\\\\\\\/g' | sed 's/\\\"/\\\\\\\"/g' | awk '{ printf \(\"\\"%s\\\\n\\"\\n\", $$0\) }' >> ${output_file}
+ COMMAND echo "\;" >> ${output_file}
+ COMMAND echo "}" >> ${output_file}
+ COMMAND echo "" >> ${output_file}
+ MAIN_DEPENDENCY ${input_file})
+endforeach (to_stringify_file)
+endmacro (stringify)
+
+set (TO_STRINGIFY_FILES ocl_stdlib)
+stringify ("${GBE_SOURCE_DIR}/src/" "${TO_STRINGIFY_FILES}")
+
+if (GBE_USE_BLOB)
+ set (GBE_SRC
+ blob.cpp
+ backend/gen/gen_mesa_disasm.c)
+else (GBE_USE_BLOB)
+ set (GBE_SRC
+ ocl_stdlib.h
+ ocl_stdlib_str.cpp
+ sys/vector.hpp
+ sys/hash_map.hpp
+ sys/map.hpp
+ sys/set.hpp
+ sys/intrusive_list.hpp
+ sys/intrusive_list.cpp
+ sys/exception.hpp
+ sys/assert.cpp
+ sys/assert.hpp
+ sys/alloc.cpp
+ sys/alloc.hpp
+ sys/mutex.cpp
+ sys/mutex.hpp
+ sys/platform.cpp
+ sys/platform.hpp
+ sys/cvar.cpp
+ sys/cvar.hpp
+ ir/context.cpp
+ ir/context.hpp
+ ir/profile.cpp
+ ir/profile.hpp
+ ir/type.cpp
+ ir/type.hpp
+ ir/unit.cpp
+ ir/unit.hpp
+ ir/constant.cpp
+ ir/constant.hpp
+ ir/instruction.cpp
+ ir/instruction.hpp
+ ir/liveness.cpp
+ ir/register.cpp
+ ir/register.hpp
+ ir/function.cpp
+ ir/function.hpp
+ ir/value.cpp
+ ir/value.hpp
+ ir/lowering.cpp
+ ir/lowering.hpp
+ backend/context.cpp
+ backend/context.hpp
+ backend/program.cpp
+ backend/program.hpp
+ backend/program.h
+ backend/gen/gen_mesa_disasm.c
+ backend/gen_insn_selection.cpp
+ backend/gen_insn_selection.hpp
+ backend/gen_insn_scheduling.cpp
+ backend/gen_insn_scheduling.hpp
+ backend/gen_reg_allocation.cpp
+ backend/gen_reg_allocation.hpp
+ backend/gen_context.cpp
+ backend/gen_context.hpp
+ backend/gen_program.cpp
+ backend/gen_program.hpp
+ backend/gen_program.h
+ backend/gen_defs.hpp
+ backend/gen_encoder.hpp
+ backend/gen_encoder.cpp)
+
+endif (GBE_USE_BLOB)
+
+include_directories (.)
+link_directories (${LLVM_LIBRARY_DIRS})
+include_directories(${LLVM_INCLUDE_DIRS})
+add_library (gbe SHARED ${GBE_SRC})
+include (${LLVM_DIR}/AddLLVMDefinitions.cmake)
+target_link_libraries (gbe
+ LLVMGenBackend
+ LLVMTransformUtils
+ LLVMCore
+ LLVMAnalysis
+ LLVMCodeGen
+ LLVMScalarOpts
+ LLVMSelectionDAG
+ LLVMSupport
+ LLVMAsmParser
+ LLVMBitReader)
+
+install (TARGETS gbe LIBRARY DESTINATION lib)
+install (FILES backend/program.h DESTINATION include/gen)
+
diff --git a/backend/src/Makefile b/backend/src/Makefile
new file mode 100644
index 0000000..ed032f1
--- /dev/null
+++ b/backend/src/Makefile
@@ -0,0 +1,4 @@
+TOP=..
+SUBDIRS=backend backend/gen backend/sim ir llvm sys .
+
+include $(TOP)/Makefile.shared
diff --git a/backend/src/all-in-one/blob.cpp b/backend/src/all-in-one/blob.cpp
new file mode 100644
index 0000000..fa4f730
--- /dev/null
+++ b/backend/src/all-in-one/blob.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file blob.cpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ *
+ * Compile the complete project from one file. This allows pretty aggresive
+ * optimization from the compiler and decreases the binary size
+ */
+
+#include "ocl_stdlib_str.cpp"
+#include "sys/assert.cpp"
+#include "sys/string.cpp"
+#include "sys/alloc.cpp"
+#include "sys/sysinfo.cpp"
+#include "sys/mutex.cpp"
+#include "sys/condition.cpp"
+#include "sys/platform.cpp"
+#include "sys/cvar.cpp"
+#include "ir/context.cpp"
+#include "ir/type.cpp"
+#include "ir/unit.cpp"
+#include "ir/constant.cpp"
+#include "ir/instruction.cpp"
+#include "ir/register.cpp"
+#include "ir/function.cpp"
+#include "ir/liveness.cpp"
+#include "ir/value.cpp"
+#include "ir/lowering.cpp"
+#include "ir/profile.cpp"
+#include "backend/context.cpp"
+#include "backend/program.cpp"
+#include "backend/sim_context.cpp"
+#include "backend/sim_program.cpp"
+#include "backend/sim/simulator_str.cpp"
+#include "backend/sim/sim_vector_str.cpp"
+#include "backend/gen_insn_selection.cpp"
+#include "backend/gen_reg_allocation.cpp"
+#include "backend/gen_context.cpp"
+#include "backend/gen_program.cpp"
+#include "backend/gen_encoder.cpp"
+
diff --git a/backend/src/backend/Makefile b/backend/src/backend/Makefile
new file mode 100644
index 0000000..7d124b1
--- /dev/null
+++ b/backend/src/backend/Makefile
@@ -0,0 +1,4 @@
+TOP=../..
+SUBDIRS=. gen
+
+include $(TOP)/Makefile.shared
diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
new file mode 100644
index 0000000..1c78276
--- /dev/null
+++ b/backend/src/backend/context.cpp
@@ -0,0 +1,545 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file context.cpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#include "backend/context.hpp"
+#include "backend/program.hpp"
+#include "backend/gen_encoder.hpp"
+#include "ir/unit.hpp"
+#include "ir/function.hpp"
+#include "ir/profile.hpp"
+#include "ir/liveness.hpp"
+#include "ir/value.hpp"
+#include "sys/cvar.hpp"
+#include <algorithm>
+
+namespace gbe
+{
+ /*! Structure that keeps track of allocation in the register file. This is
+ * actually needed by Context (and not only by GenContext) because both
+ * simulator and hardware have to deal with constant pushing which uses the
+ * register file
+ *
+ * Since Gen is pretty flexible, we just maintain a free list for the
+ * register file (as a classical allocator) and coalesce blocks when required
+ */
+ class RegisterFilePartitioner
+ {
+ public:
+ RegisterFilePartitioner(void);
+ ~RegisterFilePartitioner(void);
+
+ /*! Allocate some memory in the register file. Return 0 if out-of-memory. By
+ * the way, zero is not a valid offset since r0 is always preallocated by
+ * the hardware. Note that we always use the left most block when
+ * allocating, so it makes sense for constant pushing
+ */
+ int16_t allocate(int16_t size, int16_t alignment);
+
+ /*! Free the given register file piece */
+ void deallocate(int16_t offset);
+
+ private:
+ /*! May need to make that run-time in the future */
+ static const int16_t RegisterFileSize = 4*KB;
+
+ /*! Double chained list of free spaces */
+ struct Block {
+ Block(int16_t offset, int16_t size) :
+ prev(NULL), next(NULL), offset(offset), size(size) {}
+ Block *prev, *next; //!< Previous and next free blocks
+ int16_t offset; //!< Where the free block starts
+ int16_t size; //!< Size of the free block
+ };
+
+ /*! Try to coalesce two blocks (left and right). They must be in that order.
+ * If the colascing was done, the left block is deleted
+ */
+ void coalesce(Block *left, Block *right);
+ /*! Head of the free list */
+ Block *head;
+ /*! Handle free list element allocation */
+ DECL_POOL(Block, blockPool);
+ /*! Track allocated memory blocks <offset, size> */
+ map<int16_t, int16_t> allocatedBlocks;
+ /*! Use custom allocators */
+ GBE_CLASS(RegisterFilePartitioner);
+ };
+
+ RegisterFilePartitioner::RegisterFilePartitioner(void) {
+ // r0 is always set by the HW and used at the end by EOT
+ const int16_t offset = GEN_REG_SIZE;
+ const int16_t size = RegisterFileSize - offset;
+ head = this->newBlock(offset, size);
+ }
+
+ RegisterFilePartitioner::~RegisterFilePartitioner(void) {
+ while (this->head) {
+ Block *next = this->head->next;
+ this->deleteBlock(this->head);
+ this->head = next;
+ }
+ }
+
+ int16_t RegisterFilePartitioner::allocate(int16_t size, int16_t alignment)
+ {
+ // Make it simple and just use the first block we find
+ Block *list = head;
+ while (list) {
+ const int16_t aligned = ALIGN(list->offset, alignment);
+ const int16_t spaceOnLeft = aligned - list->offset;
+ const int16_t spaceOnRight = list->size - size - spaceOnLeft;
+
+ // Not enough space in this block
+ if (spaceOnRight < 0) {
+ list = list->next;
+ continue;
+ }
+ // Cool we can use this block
+ else {
+ Block *left = list->prev;
+ Block *right = list->next;
+
+ // If we left a hole on the left, create a new block
+ if (spaceOnLeft) {
+ Block *newBlock = this->newBlock(list->offset, spaceOnLeft);
+ if (left) {
+ left->next = newBlock;
+ newBlock->prev = left;
+ }
+ if (right) {
+ newBlock->next = right;
+ right->prev = newBlock;
+ }
+ left = newBlock;
+ }
+
+ // If we left a hole on the right, create a new block as well
+ if (spaceOnRight) {
+ Block *newBlock = this->newBlock(aligned + size, spaceOnRight);
+ if (left) {
+ left->next = newBlock;
+ newBlock->prev = left;
+ }
+ if (right) {
+ right->prev = newBlock;
+ newBlock->next = right;
+ }
+ right = newBlock;
+ }
+
+ // Chain both successors and predecessors when the entire block was
+ // allocated
+ if (spaceOnLeft == 0 && spaceOnRight == 0) {
+ if (left) left->next = right;
+ if (right) right->prev = left;
+ }
+
+ // Update the head of the free blocks
+ if (list == head) {
+ if (left)
+ head = left;
+ else if (right)
+ head = right;
+ else
+ head = NULL;
+ }
+
+ // Free the block and check the consistency
+ this->deleteBlock(list);
+ if (head && head->next) GBE_ASSERT(head->next->prev == head);
+
+ // Track the allocation to retrieve the size later
+ allocatedBlocks.insert(std::make_pair(aligned, size));
+
+ // We have a valid offset now
+ return aligned;
+ }
+ }
+ return 0;
+ }
+
+ void RegisterFilePartitioner::deallocate(int16_t offset)
+ {
+ // Retrieve the size in the allocation map
+ auto it = allocatedBlocks.find(offset);
+ GBE_ASSERT(it != allocatedBlocks.end());
+ const int16_t size = it->second;
+
+ // Find the two blocks where to insert the new block
+ Block *list = head, *prev = NULL;
+ while (list != NULL) {
+ if (list->offset > offset)
+ break;
+ prev = list;
+ list = list->next;
+ }
+
+ // Create the block and insert it
+ Block *newBlock = this->newBlock(offset, size);
+ if (prev) {
+ GBE_ASSERT(prev->offset + prev->size <= offset);
+ prev->next = newBlock;
+ newBlock->prev = prev;
+ }
+ if (list) {
+ GBE_ASSERT(offset + size <= list->offset);
+ list->prev = newBlock;
+ newBlock->next = list;
+ }
+
+ // There were no block anymore
+ if (prev == NULL && list == NULL)
+ this->head = newBlock;
+ else {
+ // Coalesce the blocks if possible
+ this->coalesce(prev, newBlock);
+ this->coalesce(newBlock, list);
+ }
+
+ // Do not track this allocation anymore
+ allocatedBlocks.erase(it);
+ }
+
+ void RegisterFilePartitioner::coalesce(Block *left, Block *right) {
+ if (left == NULL || right == NULL) return;
+ GBE_ASSERT(left->offset < right->offset);
+ GBE_ASSERT(left->next == right);
+ GBE_ASSERT(right->prev == left);
+ if (left->offset + left->size == right->offset) {
+ right->offset = left->offset;
+ right->size += left->size;
+ if (left->prev) left->prev->next = right;
+ right->prev = left->prev;
+ if (left == this->head)
+ this->head = right;
+ this->deleteBlock(left);
+ }
+ }
+
+ ///////////////////////////////////////////////////////////////////////////
+ // Generic Context (shared by the simulator and the HW context)
+ ///////////////////////////////////////////////////////////////////////////
+ IVAR(OCL_SIMD_WIDTH, 8, 16, 32);
+
+ Context::Context(const ir::Unit &unit, const std::string &name) :
+ unit(unit), fn(*unit.getFunction(name)), name(name), liveness(NULL), dag(NULL)
+ {
+ GBE_ASSERT(unit.getPointerSize() == ir::POINTER_32_BITS);
+ this->liveness = GBE_NEW(ir::Liveness, const_cast<ir::Function&>(fn));
+ this->dag = GBE_NEW(ir::FunctionDAG, *this->liveness);
+ this->partitioner = GBE_NEW_NO_ARG(RegisterFilePartitioner);
+ if (fn.getSimdWidth() == 0)
+ this->simdWidth = nextHighestPowerOf2(OCL_SIMD_WIDTH);
+ else
+ this->simdWidth = fn.getSimdWidth();
+
+ }
+ Context::~Context(void) {
+ GBE_SAFE_DELETE(this->partitioner);
+ GBE_SAFE_DELETE(this->dag);
+ GBE_SAFE_DELETE(this->liveness);
+ }
+
+ Kernel *Context::compileKernel(void) {
+ this->kernel = this->allocateKernel();
+ this->kernel->simdWidth = this->simdWidth;
+ this->buildPatchList();
+ this->buildArgList();
+ this->buildUsedLabels();
+ this->buildJIPs();
+ this->buildStack();
+ this->handleSLM();
+ if (this->emitCode() == false) {
+ GBE_DELETE(this->kernel);
+ this->kernel = NULL;
+ }
+ return this->kernel;
+ }
+
+ int16_t Context::allocate(int16_t size, int16_t alignment) {
+ return partitioner->allocate(size, alignment);
+ }
+
+ void Context::deallocate(int16_t offset) { partitioner->deallocate(offset); }
+
+ void Context::buildStack(void) {
+ const auto &stackUse = dag->getUse(ir::ocl::stackptr);
+ if (stackUse.size() == 0) // no stack is used if stackptr is unused
+ return;
+ // Be sure that the stack pointer is set
+ GBE_ASSERT(this->kernel->getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) >= 0);
+ this->kernel->stackSize = 1*KB; // XXX compute that in a better way
+ }
+
+ void Context::newCurbeEntry(gbe_curbe_type value,
+ uint32_t subValue,
+ uint32_t size,
+ uint32_t alignment)
+ {
+ alignment = alignment == 0 ? size : alignment;
+ const uint32_t offset = partitioner->allocate(size, alignment);
+ GBE_ASSERT(offset >= GEN_REG_SIZE);
+ kernel->patches.push_back(PatchInfo(value, subValue, offset - GEN_REG_SIZE));
+ kernel->curbeSize = std::max(kernel->curbeSize, offset + size - GEN_REG_SIZE);
+ }
+
+ void Context::buildPatchList(void) {
+ const uint32_t ptrSize = unit.getPointerSize() == ir::POINTER_32_BITS ? 4u : 8u;
+ kernel->curbeSize = 0u;
+
+ // We insert the block IP mask first
+ this->newCurbeEntry(GBE_CURBE_BLOCK_IP, 0, this->simdWidth*sizeof(uint16_t));
+
+ // Go over the arguments and find the related patch locations
+ const uint32_t argNum = fn.argNum();
+ for (uint32_t argID = 0u; argID < argNum; ++argID) {
+ const ir::FunctionArgument &arg = fn.getArg(argID);
+ // For pointers and values, we have nothing to do. We just push the values
+ if (arg.type == ir::FunctionArgument::GLOBAL_POINTER ||
+ arg.type == ir::FunctionArgument::LOCAL_POINTER ||
+ arg.type == ir::FunctionArgument::CONSTANT_POINTER ||
+ arg.type == ir::FunctionArgument::VALUE ||
+ arg.type == ir::FunctionArgument::STRUCTURE)
+ this->newCurbeEntry(GBE_CURBE_KERNEL_ARGUMENT, argID, arg.size, ptrSize);
+ }
+
+ // Already inserted registers go here
+ set<ir::Register> specialRegs;
+
+ const size_t localIDSize = sizeof(uint32_t) * this->simdWidth;
+ this->newCurbeEntry(GBE_CURBE_LOCAL_ID_X, 0, localIDSize);
+ this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Y, 0, localIDSize);
+ this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Z, 0, localIDSize);
+ specialRegs.insert(ir::ocl::lid0);
+ specialRegs.insert(ir::ocl::lid1);
+ specialRegs.insert(ir::ocl::lid2);
+
+ // Go over all the instructions and find the special register we need
+ // to push
+#define INSERT_REG(SPECIAL_REG, PATCH, WIDTH) \
+ if (reg == ir::ocl::SPECIAL_REG) { \
+ if (specialRegs.find(reg) != specialRegs.end()) continue; \
+ this->newCurbeEntry(GBE_CURBE_##PATCH, 0, ptrSize * WIDTH); \
+ } else
+
+ bool useStackPtr = false;
+ fn.foreachInstruction([&](const ir::Instruction &insn) {
+ const uint32_t srcNum = insn.getSrcNum();
+ for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
+ const ir::Register reg = insn.getSrc(srcID);
+ if (fn.isSpecialReg(reg) == false) continue;
+ if (specialRegs.contains(reg) == true) continue;
+ if (reg == ir::ocl::stackptr) useStackPtr = true;
+ INSERT_REG(lsize0, LOCAL_SIZE_X, 1)
+ INSERT_REG(lsize1, LOCAL_SIZE_Y, 1)
+ INSERT_REG(lsize2, LOCAL_SIZE_Z, 1)
+ INSERT_REG(gsize0, GLOBAL_SIZE_X, 1)
+ INSERT_REG(gsize1, GLOBAL_SIZE_Y, 1)
+ INSERT_REG(gsize2, GLOBAL_SIZE_Z, 1)
+ INSERT_REG(goffset0, GLOBAL_OFFSET_X, 1)
+ INSERT_REG(goffset1, GLOBAL_OFFSET_Y, 1)
+ INSERT_REG(goffset2, GLOBAL_OFFSET_Z, 1)
+ INSERT_REG(numgroup0, GROUP_NUM_X, 1)
+ INSERT_REG(numgroup1, GROUP_NUM_Y, 1)
+ INSERT_REG(numgroup2, GROUP_NUM_Z, 1)
+ INSERT_REG(stackptr, STACK_POINTER, this->simdWidth)
+ do {} while (0);
+ specialRegs.insert(reg);
+ }
+ });
+#undef INSERT_REG
+
+ // Insert the number of threads
+ this->newCurbeEntry(GBE_CURBE_THREAD_NUM, 0, sizeof(uint32_t));
+
+ // Insert the stack buffer if used
+ if (useStackPtr)
+ this->newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER, ptrSize);
+
+ // After this point the vector is immutable. Sorting it will make
+ // research faster
+ std::sort(kernel->patches.begin(), kernel->patches.end());
+
+ // Align it on 32 bytes properly
+ kernel->curbeSize = ALIGN(kernel->curbeSize, GEN_REG_SIZE);
+ }
+
+ void Context::buildArgList(void) {
+ kernel->argNum = fn.argNum();
+ if (kernel->argNum)
+ kernel->args = GBE_NEW_ARRAY_NO_ARG(KernelArgument, kernel->argNum);
+ else
+ kernel->args = NULL;
+ for (uint32_t argID = 0; argID < kernel->argNum; ++argID) {
+ const auto &arg = fn.getArg(argID);
+ switch (arg.type) {
+ case ir::FunctionArgument::VALUE:
+ case ir::FunctionArgument::STRUCTURE:
+ kernel->args[argID].type = GBE_ARG_VALUE;
+ kernel->args[argID].size = arg.size;
+ break;
+ case ir::FunctionArgument::GLOBAL_POINTER:
+ kernel->args[argID].type = GBE_ARG_GLOBAL_PTR;
+ kernel->args[argID].size = sizeof(void*);
+ break;
+ case ir::FunctionArgument::CONSTANT_POINTER:
+ kernel->args[argID].type = GBE_ARG_CONSTANT_PTR;
+ kernel->args[argID].size = sizeof(void*);
+ break;
+ case ir::FunctionArgument::LOCAL_POINTER:
+ kernel->args[argID].type = GBE_ARG_LOCAL_PTR;
+ kernel->args[argID].size = 0;
+ break;
+ case ir::FunctionArgument::IMAGE:
+ kernel->args[argID].type = GBE_ARG_IMAGE;
+ kernel->args[argID].size = sizeof(void*);
+ break;
+ }
+ }
+ }
+
+ void Context::buildUsedLabels(void) {
+ usedLabels.clear();
+ fn.foreachInstruction([this](const ir::Instruction &insn) {
+ using namespace ir;
+ if (insn.getOpcode() != OP_BRA) return;
+ const LabelIndex index = cast<BranchInstruction>(insn).getLabelIndex();
+ usedLabels.insert(index);
+ });
+ }
+
+ void Context::buildJIPs(void) {
+ using namespace ir;
+
+ // Linearly store the branch target for each block and its own label
+ const LabelIndex noTarget(fn.labelNum());
+ vector<std::pair<LabelIndex, LabelIndex>> braTargets;
+ int32_t curr = 0, blockNum = fn.blockNum();
+ braTargets.resize(blockNum);
+
+ // If some blocks are unused we mark them as such by setting their own label
+ // as "invalid" (== noTarget)
+ for (auto &bb : braTargets) bb = std::make_pair(noTarget, noTarget);
+ fn.foreachBlock([&](const BasicBlock &bb) {
+ const LabelIndex ownLabel = bb.getLabelIndex();
+ const Instruction *last = bb.getLastInstruction();
+ if (last->getOpcode() != OP_BRA)
+ braTargets[curr++] = std::make_pair(ownLabel, noTarget);
+ else {
+ const BranchInstruction *bra = cast<BranchInstruction>(last);
+ braTargets[curr++] = std::make_pair(ownLabel, bra->getLabelIndex());
+ }
+ });
+
+ // Backward jumps are special. We must insert the label of the next block
+ // when we hit the "DO" i.e. the target label of the backward branch (as in
+ // do { } while) . So, we store the bwd jumps per targets
+ // XXX does not use custom allocator
+ std::multimap<LabelIndex, LabelIndex> bwdTargets;
+ for (int32_t blockID = 0; blockID < blockNum; ++blockID) {
+ const LabelIndex ownLabel = braTargets[blockID].first;
+ const LabelIndex target = braTargets[blockID].second;
+ if (ownLabel == noTarget) continue; // unused block
+ if (target == noTarget) continue; // no branch
+ if (target <= ownLabel) { // This is a backward jump
+ // Last block is just "RET". So, it cannot be the last block
+ GBE_ASSERT(blockID < blockNum - 1);
+ const LabelIndex fallThrough = braTargets[blockID+1].first;
+ bwdTargets.insert(std::make_pair(target, fallThrough));
+ }
+ }
+
+ // Stores the current forward targets
+ set<LabelIndex> fwdTargets;
+
+ // Now retraverse the blocks and figure out all JIPs
+ for (int32_t blockID = 0; blockID < blockNum; ++blockID) {
+ const LabelIndex ownLabel = braTargets[blockID].first;
+ const LabelIndex target = braTargets[blockID].second;
+ const BasicBlock &bb = fn.getBlock(ownLabel);
+ const Instruction *label = bb.getFirstInstruction();
+ const Instruction *bra = bb.getLastInstruction();
+
+ // Expires the branches that point to us (if any)
+ auto it = fwdTargets.find(ownLabel);
+ if (it != fwdTargets.end()) fwdTargets.erase(it);
+
+ // Insert the fall through of the bwd branches that point to us if any
+ auto ii = bwdTargets.equal_range(ownLabel);
+ for (auto it = ii.first; it != ii.second; ++it)
+ fwdTargets.insert(it->second);
+
+ // If there is an outstanding forward branch, compute a JIP for the label
+ auto lower = fwdTargets.lower_bound(LabelIndex(0));
+ GBE_ASSERT(label->isMemberOf<LabelInstruction>() == true);
+ if (lower != fwdTargets.end())
+ JIPs.insert(std::make_pair(label, *lower));
+
+ // Handle special cases and backward branches first
+ if (ownLabel == noTarget) continue; // unused block
+ if (target == noTarget) continue; // no branch at all
+ GBE_ASSERT(bra->isMemberOf<BranchInstruction>() == true);
+ if (target <= ownLabel) { // bwd branch: we always jump
+ JIPs.insert(std::make_pair(bra, LabelIndex(target)));
+ continue;
+ }
+
+ // This is a forward jump, register it and get the JIP
+ fwdTargets.insert(target);
+ auto jip = fwdTargets.lower_bound(LabelIndex(0));
+ JIPs.insert(std::make_pair(bra, *jip));
+ }
+ }
+
+ void Context::handleSLM(void) {
+ const bool useSLM = fn.getUseSLM();
+ kernel->useSLM = useSLM;
+ }
+
+ bool Context::isScalarReg(const ir::Register ®) const {
+ GBE_ASSERT(fn.getProfile() == ir::Profile::PROFILE_OCL);
+ if (fn.getArg(reg) != NULL) return true;
+ if (fn.getPushLocation(reg) != NULL) return true;
+ if (reg == ir::ocl::groupid0 ||
+ reg == ir::ocl::groupid1 ||
+ reg == ir::ocl::groupid2 ||
+ reg == ir::ocl::barrierid ||
+ reg == ir::ocl::threadn ||
+ reg == ir::ocl::numgroup0 ||
+ reg == ir::ocl::numgroup1 ||
+ reg == ir::ocl::numgroup2 ||
+ reg == ir::ocl::lsize0 ||
+ reg == ir::ocl::lsize1 ||
+ reg == ir::ocl::lsize2 ||
+ reg == ir::ocl::gsize0 ||
+ reg == ir::ocl::gsize1 ||
+ reg == ir::ocl::gsize2 ||
+ reg == ir::ocl::goffset0 ||
+ reg == ir::ocl::goffset1 ||
+ reg == ir::ocl::goffset2)
+ return true;
+ return false;
+ }
+
+} /* namespace gbe */
+
diff --git a/backend/src/backend/context.hpp b/backend/src/backend/context.hpp
new file mode 100644
index 0000000..55a63a7
--- /dev/null
+++ b/backend/src/backend/context.hpp
@@ -0,0 +1,130 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __GBE_CONTEXT_HPP__
+#define __GBE_CONTEXT_HPP__
+
+#include "ir/instruction.hpp"
+#include "backend/program.h"
+#include "sys/set.hpp"
+#include "sys/map.hpp"
+#include "sys/platform.hpp"
+#include <string>
+
+namespace gbe {
+namespace ir {
+
+ class Unit; // Contains the complete program
+ class Function; // We compile a function into a kernel
+ class Liveness; // Describes liveness of each ir function register
+ class FunctionDAG; // Describes the instruction dependencies
+
+} /* namespace ir */
+} /* namespace gbe */
+
+namespace gbe
+{
+ class Kernel; // context creates Kernel
+ class RegisterFilePartitioner; // Partition register file for reg allocation
+
+ /*! Context is the helper structure to build the Gen ISA or simulation code
+ * from GenIR
+ */
+ class Context : public NonCopyable
+ {
+ public:
+ /*! Create a new context. name is the name of the function we want to
+ * compile
+ */
+ Context(const ir::Unit &unit, const std::string &name);
+ /*! Release everything needed */
+ virtual ~Context(void);
+ /*! Compile the code */
+ Kernel *compileKernel(void);
+ /*! Tells if the labels is used */
+ INLINE bool isLabelUsed(ir::LabelIndex index) const {
+ return usedLabels.contains(index);
+ }
+ /*! Get the function graph */
+ INLINE const ir::FunctionDAG &getFunctionDAG(void) const { return *dag; }
+ /*! Get the liveness information */
+ INLINE const ir::Liveness &getLiveness(void) const { return *liveness; }
+ /*! Tells if the register is used */
+ bool isRegUsed(const ir::Register ®) const;
+ /*! Indicate if a register is scalar or not */
+ bool isScalarReg(const ir::Register ®) const;
+ /*! Get the kernel we are currently compiling */
+ INLINE Kernel *getKernel(void) const { return this->kernel; }
+ /*! Get the function we are currently compiling */
+ INLINE const ir::Function &getFunction(void) const { return this->fn; }
+ /*! Get the target label index for the given instruction */
+ INLINE ir::LabelIndex getLabelIndex(const ir::Instruction *insn) const {
+ GBE_ASSERT(JIPs.find(insn) != JIPs.end());
+ return JIPs.find(insn)->second;
+ }
+ /*! Only GOTO and some LABEL instructions may have JIPs */
+ INLINE bool hasJIP(const ir::Instruction *insn) const {
+ return JIPs.find(insn) != JIPs.end();
+ }
+ /*! Allocate some memory in the register file */
+ int16_t allocate(int16_t size, int16_t alignment);
+ /*! Deallocate previously allocated memory */
+ void deallocate(int16_t offset);
+ protected:
+ /*! Build the instruction stream. Return false if failed */
+ virtual bool emitCode(void) = 0;
+ /*! Allocate a new empty kernel (to be implemented) */
+ virtual Kernel *allocateKernel(void) = 0;
+ /*! Look if a stack is needed and allocate it */
+ void buildStack(void);
+ /*! Build the curbe patch list for the given kernel */
+ void buildPatchList(void);
+ /*! Build the list of arguments to set to launch the kernel */
+ void buildArgList(void);
+ /*! Build the sets of used labels */
+ void buildUsedLabels(void);
+ /*! Build JIPs for each branch and possibly labels. Can be different from
+ * the branch target due to unstructured branches
+ */
+ void buildJIPs(void);
+ /*! Configure SLM use if needed */
+ void handleSLM(void);
+ /*! Insert a new entry with the given size in the Curbe. Return the offset
+ * of the entry
+ */
+ void newCurbeEntry(gbe_curbe_type value, uint32_t subValue, uint32_t size, uint32_t alignment = 0);
+ /*! Provide for each branch and label the label index target */
+ typedef map<const ir::Instruction*, ir::LabelIndex> JIPMap;
+ const ir::Unit &unit; //!< Unit that contains the kernel
+ const ir::Function &fn; //!< Function to compile
+ std::string name; //!< Name of the kernel to compile
+ Kernel *kernel; //!< Kernel we are building
+ ir::Liveness *liveness; //!< Liveness info for the variables
+ ir::FunctionDAG *dag; //!< Graph of values on the function
+ RegisterFilePartitioner *partitioner; //!< Handle register file partionning
+ set<ir::LabelIndex> usedLabels; //!< Set of all used labels
+ JIPMap JIPs; //!< Where to jump all labels/branches
+ uint32_t simdWidth; //!< Number of lanes per HW threads
+ GBE_CLASS(Context); //!< Use custom allocators
+ };
+
+} /* namespace gbe */
+
+#endif /* __GBE_CONTEXT_HPP__ */
+
diff --git a/backend/src/backend/gen/Makefile b/backend/src/backend/gen/Makefile
new file mode 100644
index 0000000..6ba1627
--- /dev/null
+++ b/backend/src/backend/gen/Makefile
@@ -0,0 +1,4 @@
+TOP=../../..
+SUBDIRS=.
+
+include $(TOP)/Makefile.shared
diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c
new file mode 100644
index 0000000..31d51f4
--- /dev/null
+++ b/backend/src/backend/gen/gen_mesa_disasm.c
@@ -0,0 +1,1146 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/*
+ * Copyright � 2008 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. The copyright holders make no representations
+ * about the suitability of this software for any purpose. It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <assert.h>
+
+#include "backend/gen_defs.hpp"
+
+static const struct {
+ const char *name;
+ int nsrc;
+ int ndst;
+} opcode[128] = {
+ [GEN_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 },
+ [GEN_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 },
+ [GEN_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 },
+ [GEN_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 },
+ [GEN_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 },
+ [GEN_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 },
+ [GEN_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 },
+ [GEN_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
+
+ [GEN_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 },
+ [GEN_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
+ [GEN_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 },
+ [GEN_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
+ [GEN_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
+ [GEN_OPCODE_MAD] = { .name = "mad", .nsrc = 3, .ndst = 1 },
+ [GEN_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
+ [GEN_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
+ [GEN_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
+ [GEN_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 },
+ [GEN_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 },
+ [GEN_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
+ [GEN_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 },
+
+ [GEN_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
+ [GEN_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
+ [GEN_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 },
+ [GEN_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 },
+ [GEN_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 },
+ [GEN_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 },
+ [GEN_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 },
+ [GEN_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 },
+ [GEN_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 },
+ [GEN_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 },
+ [GEN_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 },
+
+ [GEN_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
+ [GEN_OPCODE_SENDC] = { .name = "sendc", .nsrc = 1, .ndst = 1 },
+ [GEN_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
+ [GEN_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 0, .ndst = 0 },
+ [GEN_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
+ [GEN_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 },
+ [GEN_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 },
+ [GEN_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
+ [GEN_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 },
+ [GEN_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
+ [GEN_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
+ [GEN_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
+ [GEN_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 },
+ [GEN_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 },
+ [GEN_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 },
+ [GEN_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 },
+ [GEN_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 },
+ [GEN_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 },
+};
+
+static const char *conditional_modifier[16] = {
+ [GEN_CONDITIONAL_NONE] = "",
+ [GEN_CONDITIONAL_Z] = ".e",
+ [GEN_CONDITIONAL_NZ] = ".ne",
+ [GEN_CONDITIONAL_G] = ".g",
+ [GEN_CONDITIONAL_GE] = ".ge",
+ [GEN_CONDITIONAL_L] = ".l",
+ [GEN_CONDITIONAL_LE] = ".le",
+ [GEN_CONDITIONAL_R] = ".r",
+ [GEN_CONDITIONAL_O] = ".o",
+ [GEN_CONDITIONAL_U] = ".u",
+};
+
+static const char *negate[2] = {
+ [0] = "",
+ [1] = "-",
+};
+
+static const char *_abs[2] = {
+ [0] = "",
+ [1] = "(abs)",
+};
+
+static const char *vert_stride[16] = {
+ [0] = "0",
+ [1] = "1",
+ [2] = "2",
+ [3] = "4",
+ [4] = "8",
+ [5] = "16",
+ [6] = "32",
+ [15] = "VxH",
+};
+
+static const char *width[8] = {
+ [0] = "1",
+ [1] = "2",
+ [2] = "4",
+ [3] = "8",
+ [4] = "16",
+};
+
+static const char *horiz_stride[4] = {
+ [0] = "0",
+ [1] = "1",
+ [2] = "2",
+ [3] = "4"
+};
+
+static const char *chan_sel[4] = {
+ [0] = "x",
+ [1] = "y",
+ [2] = "z",
+ [3] = "w",
+};
+
+static const char *debug_ctrl[2] = {
+ [0] = "",
+ [1] = ".breakpoint"
+};
+
+static const char *saturate[2] = {
+ [0] = "",
+ [1] = ".sat"
+};
+
+static const char *accwr[2] = {
+ [0] = "",
+ [1] = "AccWrEnable"
+};
+
+static const char *wectrl[2] = {
+ [0] = "WE_normal",
+ [1] = "WE_all"
+};
+
+static const char *exec_size[8] = {
+ [0] = "1",
+ [1] = "2",
+ [2] = "4",
+ [3] = "8",
+ [4] = "16",
+ [5] = "32"
+};
+
+static const char *pred_inv[2] = {
+ [0] = "+",
+ [1] = "-"
+};
+
+static const char *pred_ctrl_align16[16] = {
+ [1] = "",
+ [2] = ".x",
+ [3] = ".y",
+ [4] = ".z",
+ [5] = ".w",
+ [6] = ".any4h",
+ [7] = ".all4h",
+};
+
+static const char *pred_ctrl_align1[16] = {
+ [1] = "",
+ [2] = ".anyv",
+ [3] = ".allv",
+ [4] = ".any2h",
+ [5] = ".all2h",
+ [6] = ".any4h",
+ [7] = ".all4h",
+ [8] = ".any8h",
+ [9] = ".all8h",
+ [10] = ".any16h",
+ [11] = ".all16h",
+};
+
+static const char *thread_ctrl[4] = {
+ [0] = "",
+ [2] = "switch"
+};
+
+static const char *dep_ctrl[4] = {
+ [0] = "",
+ [1] = "NoDDClr",
+ [2] = "NoDDChk",
+ [3] = "NoDDClr,NoDDChk",
+};
+
+static const char *mask_ctrl[4] = {
+ [0] = "",
+ [1] = "nomask",
+};
+
+static const char *access_mode[2] = {
+ [0] = "align1",
+ [1] = "align16",
+};
+
+static const char *reg_encoding[8] = {
+ [0] = "UD",
+ [1] = "D",
+ [2] = "UW",
+ [3] = "W",
+ [4] = "UB",
+ [5] = "B",
+ [7] = "F"
+};
+
+int reg_type_size[8] = {
+ [0] = 4,
+ [1] = 4,
+ [2] = 2,
+ [3] = 2,
+ [4] = 1,
+ [5] = 1,
+ [7] = 4
+};
+
+static const char *reg_file[4] = {
+ [0] = "A",
+ [1] = "g",
+ [2] = "m",
+ [3] = "imm",
+};
+
+static const char *writemask[16] = {
+ [0x0] = ".",
+ [0x1] = ".x",
+ [0x2] = ".y",
+ [0x3] = ".xy",
+ [0x4] = ".z",
+ [0x5] = ".xz",
+ [0x6] = ".yz",
+ [0x7] = ".xyz",
+ [0x8] = ".w",
+ [0x9] = ".xw",
+ [0xa] = ".yw",
+ [0xb] = ".xyw",
+ [0xc] = ".zw",
+ [0xd] = ".xzw",
+ [0xe] = ".yzw",
+ [0xf] = "",
+};
+
+static const char *end_of_thread[2] = {
+ [0] = "",
+ [1] = "EOT"
+};
+
+static const char *target_function_gen6[16] = {
+ [GEN_SFID_NULL] = "null",
+ [GEN_SFID_MATH] = "math",
+ [GEN_SFID_SAMPLER] = "sampler",
+ [GEN_SFID_MESSAGE_GATEWAY] = "gateway",
+ [GEN_SFID_URB] = "urb",
+ [GEN_SFID_THREAD_SPAWNER] = "thread_spawner",
+ [GEN6_SFID_DATAPORT_SAMPLER_CACHE] = "sampler",
+ [GEN6_SFID_DATAPORT_RENDER_CACHE] = "render",
+ [GEN6_SFID_DATAPORT_CONSTANT_CACHE] = "const",
+ [GEN_SFID_DATAPORT_DATA_CACHE] = "data"
+};
+
+static const char *math_function[16] = {
+ [GEN_MATH_FUNCTION_INV] = "inv",
+ [GEN_MATH_FUNCTION_LOG] = "log",
+ [GEN_MATH_FUNCTION_EXP] = "exp",
+ [GEN_MATH_FUNCTION_SQRT] = "sqrt",
+ [GEN_MATH_FUNCTION_RSQ] = "rsq",
+ [GEN_MATH_FUNCTION_SIN] = "sin",
+ [GEN_MATH_FUNCTION_COS] = "cos",
+ [GEN_MATH_FUNCTION_SINCOS] = "sincos",
+ [GEN_MATH_FUNCTION_TAN] = "tan",
+ [GEN_MATH_FUNCTION_POW] = "pow",
+ [GEN_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod",
+ [GEN_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intdiv",
+ [GEN_MATH_FUNCTION_INT_DIV_REMAINDER] = "intmod",
+};
+
+static const char *math_saturate[2] = {
+ [0] = "",
+ [1] = "sat"
+};
+
+static const char *math_signed[2] = {
+ [0] = "",
+ [1] = "signed"
+};
+
+static const char *math_scalar[2] = {
+ [0] = "",
+ [1] = "scalar"
+};
+
+static const char *math_precision[2] = {
+ [0] = "",
+ [1] = "partial_precision"
+};
+
+static int column;
+
+static int string (FILE *file, const char *string)
+{
+ fputs (string, file);
+ column += strlen (string);
+ return 0;
+}
+
+static int format (FILE *f, const char *format, ...)
+{
+ char buf[1024];
+ va_list args;
+ va_start (args, format);
+
+ vsnprintf (buf, sizeof (buf) - 1, format, args);
+ va_end (args);
+ string (f, buf);
+ return 0;
+}
+
+static int newline (FILE *f)
+{
+ putc ('\n', f);
+ column = 0;
+ return 0;
+}
+
+static int pad (FILE *f, int c)
+{
+ do
+ string (f, " ");
+ while (column < c);
+ return 0;
+}
+
+static int control (FILE *file, const char *name, const char *ctrl[], uint32_t id, int *space)
+{
+ if (!ctrl[id]) {
+ fprintf (file, "*** invalid %s value %d ",
+ name, id);
+ return 1;
+ }
+ if (ctrl[id][0])
+ {
+ if (space && *space)
+ string (file, " ");
+ string (file, ctrl[id]);
+ if (space)
+ *space = 1;
+ }
+ return 0;
+}
+
+static int print_opcode (FILE *file, int id)
+{
+ if (!opcode[id].name) {
+ format (file, "*** invalid opcode value %d ", id);
+ return 1;
+ }
+ string (file, opcode[id].name);
+ return 0;
+}
+
+static int reg (FILE *file, uint32_t _reg_file, uint32_t _reg_nr)
+{
+ int err = 0;
+
+ if (_reg_file == GEN_ARCHITECTURE_REGISTER_FILE) {
+ switch (_reg_nr & 0xf0) {
+ case GEN_ARF_NULL:
+ string (file, "null");
+ return -1;
+ case GEN_ARF_ADDRESS:
+ format (file, "a%d", _reg_nr & 0x0f);
+ break;
+ case GEN_ARF_ACCUMULATOR:
+ format (file, "acc%d", _reg_nr & 0x0f);
+ break;
+ case GEN_ARF_FLAG:
+ format (file, "f%d", _reg_nr & 0x0f);
+ break;
+ case GEN_ARF_MASK:
+ format (file, "mask%d", _reg_nr & 0x0f);
+ break;
+ case GEN_ARF_MASK_STACK:
+ format (file, "msd%d", _reg_nr & 0x0f);
+ break;
+ case GEN_ARF_STATE:
+ format (file, "sr%d", _reg_nr & 0x0f);
+ break;
+ case GEN_ARF_CONTROL:
+ format (file, "cr%d", _reg_nr & 0x0f);
+ break;
+ case GEN_ARF_NOTIFICATION_COUNT:
+ format (file, "n%d", _reg_nr & 0x0f);
+ break;
+ case GEN_ARF_IP:
+ string (file, "ip");
+ return -1;
+ break;
+ default:
+ format (file, "ARF%d", _reg_nr);
+ break;
+ }
+ } else {
+ err |= control (file, "src reg file", reg_file, _reg_file, NULL);
+ format (file, "%d", _reg_nr);
+ }
+ return err;
+}
+
+static int dest (FILE *file, const struct GenInstruction *inst)
+{
+ int err = 0;
+
+ if (inst->header.access_mode == GEN_ALIGN_1)
+ {
+ if (inst->bits1.da1.dest_address_mode == GEN_ADDRESS_DIRECT)
+ {
+ err |= reg (file, inst->bits1.da1.dest_reg_file, inst->bits1.da1.dest_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits1.da1.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.da1.dest_subreg_nr /
+ reg_type_size[inst->bits1.da1.dest_reg_type]);
+ format (file, "<%d>", inst->bits1.da1.dest_horiz_stride);
+ err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL);
+ }
+ else
+ {
+ string (file, "g[a0");
+ if (inst->bits1.ia1.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.ia1.dest_subreg_nr /
+ reg_type_size[inst->bits1.ia1.dest_reg_type]);
+ if (inst->bits1.ia1.dest_indirect_offset)
+ format (file, " %d", inst->bits1.ia1.dest_indirect_offset);
+ string (file, "]");
+ format (file, "<%d>", inst->bits1.ia1.dest_horiz_stride);
+ err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.ia1.dest_reg_type, NULL);
+ }
+ }
+ else
+ {
+ if (inst->bits1.da16.dest_address_mode == GEN_ADDRESS_DIRECT)
+ {
+ err |= reg (file, inst->bits1.da16.dest_reg_file, inst->bits1.da16.dest_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits1.da16.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.da16.dest_subreg_nr /
+ reg_type_size[inst->bits1.da16.dest_reg_type]);
+ string (file, "<1>");
+ err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL);
+ err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL);
+ }
+ else
+ {
+ err = 1;
+ string (file, "Indirect align16 address mode not supported");
+ }
+ }
+
+ return 0;
+}
+
+static int dest_3src (FILE *file, const struct GenInstruction *inst)
+{
+ int err = 0;
+ const uint32_t reg_file = GEN_GENERAL_REGISTER_FILE;
+
+ err |= reg (file, reg_file, inst->bits1.da3src.dest_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits1.da3src.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.da3src.dest_subreg_nr);
+ string (file, "<1>");
+ err |= control (file, "writemask", writemask, inst->bits1.da3src.dest_writemask, NULL);
+ err |= control (file, "dest reg encoding", reg_encoding, GEN_TYPE_F, NULL);
+
+ return 0;
+}
+
+static int src_align1_region (FILE *file,
+ uint32_t _vert_stride, uint32_t _width, uint32_t _horiz_stride)
+{
+ int err = 0;
+ string (file, "<");
+ err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
+ string (file, ",");
+ err |= control (file, "width", width, _width, NULL);
+ string (file, ",");
+ err |= control (file, "horiz_stride", horiz_stride, _horiz_stride, NULL);
+ string (file, ">");
+ return err;
+}
+
+static int src_da1 (FILE *file, uint32_t type, uint32_t _reg_file,
+ uint32_t _vert_stride, uint32_t _width, uint32_t _horiz_stride,
+ uint32_t reg_num, uint32_t sub_reg_num, uint32_t __abs, uint32_t _negate)
+{
+ int err = 0;
+ err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "abs", _abs, __abs, NULL);
+
+ err |= reg (file, _reg_file, reg_num);
+ if (err == -1)
+ return 0;
+ if (sub_reg_num)
+ format (file, ".%d", sub_reg_num / reg_type_size[type]); /* use formal style like spec */
+ src_align1_region (file, _vert_stride, _width, _horiz_stride);
+ err |= control (file, "src reg encoding", reg_encoding, type, NULL);
+ return err;
+}
+
+static int src_ia1 (FILE *file,
+ uint32_t type,
+ uint32_t _reg_file,
+ int32_t _addr_imm,
+ uint32_t _addr_subreg_nr,
+ uint32_t _negate,
+ uint32_t __abs,
+ uint32_t _addr_mode,
+ uint32_t _horiz_stride,
+ uint32_t _width,
+ uint32_t _vert_stride)
+{
+ int err = 0;
+ err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "abs", _abs, __abs, NULL);
+
+ string (file, "g[a0");
+ if (_addr_subreg_nr)
+ format (file, ".%d", _addr_subreg_nr);
+ if (_addr_imm)
+ format (file, " %d", _addr_imm);
+ string (file, "]");
+ src_align1_region (file, _vert_stride, _width, _horiz_stride);
+ err |= control (file, "src reg encoding", reg_encoding, type, NULL);
+ return err;
+}
+
+static int src_da16 (FILE *file,
+ uint32_t _reg_type,
+ uint32_t _reg_file,
+ uint32_t _vert_stride,
+ uint32_t _reg_nr,
+ uint32_t _subreg_nr,
+ uint32_t __abs,
+ uint32_t _negate,
+ uint32_t swz_x,
+ uint32_t swz_y,
+ uint32_t swz_z,
+ uint32_t swz_w)
+{
+ int err = 0;
+ err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "abs", _abs, __abs, NULL);
+
+ err |= reg (file, _reg_file, _reg_nr);
+ if (err == -1)
+ return 0;
+ if (_subreg_nr)
+ /* bit4 for subreg number byte addressing. Make this same meaning as
+ in da1 case, so output looks consistent. */
+ format (file, ".%d", 16 / reg_type_size[_reg_type]);
+ string (file, "<");
+ err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
+ string (file, ",4,1>");
+ /*
+ * Three kinds of swizzle display:
+ * identity - nothing printed
+ * 1->all - print the single channel
+ * 1->1 - print the mapping
+ */
+ if (swz_x == GEN_CHANNEL_X &&
+ swz_y == GEN_CHANNEL_Y &&
+ swz_z == GEN_CHANNEL_Z &&
+ swz_w == GEN_CHANNEL_W)
+ {
+ ;
+ }
+ else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ }
+ else
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ err |= control (file, "channel select", chan_sel, swz_y, NULL);
+ err |= control (file, "channel select", chan_sel, swz_z, NULL);
+ err |= control (file, "channel select", chan_sel, swz_w, NULL);
+ }
+ err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL);
+ return err;
+}
+
+static int src0_3src (FILE *file, const struct GenInstruction *inst)
+{
+ int err = 0;
+ uint32_t swz_x = (inst->bits2.da3src.src0_swizzle >> 0) & 0x3;
+ uint32_t swz_y = (inst->bits2.da3src.src0_swizzle >> 2) & 0x3;
+ uint32_t swz_z = (inst->bits2.da3src.src0_swizzle >> 4) & 0x3;
+ uint32_t swz_w = (inst->bits2.da3src.src0_swizzle >> 6) & 0x3;
+
+ err |= control (file, "negate", negate, inst->bits1.da3src.src0_negate, NULL);
+ err |= control (file, "abs", _abs, inst->bits1.da3src.src0_abs, NULL);
+
+ err |= reg (file, GEN_GENERAL_REGISTER_FILE, inst->bits2.da3src.src0_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits2.da3src.src0_subreg_nr)
+ format (file, ".%d", inst->bits2.da3src.src0_subreg_nr);
+ string (file, "<4,1,1>");
+ err |= control (file, "src da16 reg type", reg_encoding,
+ GEN_TYPE_F, NULL);
+ /*
+ * Three kinds of swizzle display:
+ * identity - nothing printed
+ * 1->all - print the single channel
+ * 1->1 - print the mapping
+ */
+ if (swz_x == GEN_CHANNEL_X &&
+ swz_y == GEN_CHANNEL_Y &&
+ swz_z == GEN_CHANNEL_Z &&
+ swz_w == GEN_CHANNEL_W)
+ {
+ ;
+ }
+ else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ }
+ else
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ err |= control (file, "channel select", chan_sel, swz_y, NULL);
+ err |= control (file, "channel select", chan_sel, swz_z, NULL);
+ err |= control (file, "channel select", chan_sel, swz_w, NULL);
+ }
+ return err;
+}
+
+static int src1_3src (FILE *file, const struct GenInstruction *inst)
+{
+ int err = 0;
+ uint32_t swz_x = (inst->bits2.da3src.src1_swizzle >> 0) & 0x3;
+ uint32_t swz_y = (inst->bits2.da3src.src1_swizzle >> 2) & 0x3;
+ uint32_t swz_z = (inst->bits2.da3src.src1_swizzle >> 4) & 0x3;
+ uint32_t swz_w = (inst->bits2.da3src.src1_swizzle >> 6) & 0x3;
+ uint32_t src1_subreg_nr = (inst->bits2.da3src.src1_subreg_nr_low |
+ (inst->bits3.da3src.src1_subreg_nr_high << 2));
+
+ err |= control (file, "negate", negate, inst->bits1.da3src.src1_negate,
+ NULL);
+ err |= control (file, "abs", _abs, inst->bits1.da3src.src1_abs, NULL);
+
+ err |= reg (file, GEN_GENERAL_REGISTER_FILE,
+ inst->bits3.da3src.src1_reg_nr);
+ if (err == -1)
+ return 0;
+ if (src1_subreg_nr)
+ format (file, ".%d", src1_subreg_nr);
+ string (file, "<4,1,1>");
+ err |= control (file, "src da16 reg type", reg_encoding,
+ GEN_TYPE_F, NULL);
+ /*
+ * Three kinds of swizzle display:
+ * identity - nothing printed
+ * 1->all - print the single channel
+ * 1->1 - print the mapping
+ */
+ if (swz_x == GEN_CHANNEL_X &&
+ swz_y == GEN_CHANNEL_Y &&
+ swz_z == GEN_CHANNEL_Z &&
+ swz_w == GEN_CHANNEL_W)
+ {
+ ;
+ }
+ else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ }
+ else
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ err |= control (file, "channel select", chan_sel, swz_y, NULL);
+ err |= control (file, "channel select", chan_sel, swz_z, NULL);
+ err |= control (file, "channel select", chan_sel, swz_w, NULL);
+ }
+ return err;
+}
+
+
+static int src2_3src (FILE *file, const struct GenInstruction *inst)
+{
+ int err = 0;
+ uint32_t swz_x = (inst->bits3.da3src.src2_swizzle >> 0) & 0x3;
+ uint32_t swz_y = (inst->bits3.da3src.src2_swizzle >> 2) & 0x3;
+ uint32_t swz_z = (inst->bits3.da3src.src2_swizzle >> 4) & 0x3;
+ uint32_t swz_w = (inst->bits3.da3src.src2_swizzle >> 6) & 0x3;
+
+ err |= control (file, "negate", negate, inst->bits1.da3src.src2_negate,
+ NULL);
+ err |= control (file, "abs", _abs, inst->bits1.da3src.src2_abs, NULL);
+
+ err |= reg (file, GEN_GENERAL_REGISTER_FILE,
+ inst->bits3.da3src.src2_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits3.da3src.src2_subreg_nr)
+ format (file, ".%d", inst->bits3.da3src.src2_subreg_nr);
+ string (file, "<4,1,1>");
+ err |= control (file, "src da16 reg type", reg_encoding,
+ GEN_TYPE_F, NULL);
+ /*
+ * Three kinds of swizzle display:
+ * identity - nothing printed
+ * 1->all - print the single channel
+ * 1->1 - print the mapping
+ */
+ if (swz_x == GEN_CHANNEL_X &&
+ swz_y == GEN_CHANNEL_Y &&
+ swz_z == GEN_CHANNEL_Z &&
+ swz_w == GEN_CHANNEL_W)
+ {
+ ;
+ }
+ else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ }
+ else
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ err |= control (file, "channel select", chan_sel, swz_y, NULL);
+ err |= control (file, "channel select", chan_sel, swz_z, NULL);
+ err |= control (file, "channel select", chan_sel, swz_w, NULL);
+ }
+ return err;
+}
+
+static int imm (FILE *file, uint32_t type, const struct GenInstruction *inst) {
+ switch (type) {
+ case GEN_TYPE_UD:
+ format (file, "0x%08xUD", inst->bits3.ud);
+ break;
+ case GEN_TYPE_D:
+ format (file, "%dD", inst->bits3.d);
+ break;
+ case GEN_TYPE_UW:
+ format (file, "0x%04xUW", (uint16_t) inst->bits3.ud);
+ break;
+ case GEN_TYPE_W:
+ format (file, "%dW", (int16_t) inst->bits3.d);
+ break;
+ case GEN_TYPE_UB:
+ format (file, "0x%02xUB", (int8_t) inst->bits3.ud);
+ break;
+ case GEN_TYPE_VF:
+ format (file, "Vector Float");
+ break;
+ case GEN_TYPE_V:
+ format (file, "0x%08xV", inst->bits3.ud);
+ break;
+ case GEN_TYPE_F:
+ format (file, "%-gF", inst->bits3.f);
+ }
+ return 0;
+}
+
+static int src0 (FILE *file, const struct GenInstruction *inst)
+{
+ if (inst->bits1.da1.src0_reg_file == GEN_IMMEDIATE_VALUE)
+ return imm (file, inst->bits1.da1.src0_reg_type,
+ inst);
+ else if (inst->header.access_mode == GEN_ALIGN_1)
+ {
+ if (inst->bits2.da1.src0_address_mode == GEN_ADDRESS_DIRECT)
+ {
+ return src_da1 (file,
+ inst->bits1.da1.src0_reg_type,
+ inst->bits1.da1.src0_reg_file,
+ inst->bits2.da1.src0_vert_stride,
+ inst->bits2.da1.src0_width,
+ inst->bits2.da1.src0_horiz_stride,
+ inst->bits2.da1.src0_reg_nr,
+ inst->bits2.da1.src0_subreg_nr,
+ inst->bits2.da1.src0_abs,
+ inst->bits2.da1.src0_negate);
+ }
+ else
+ {
+ return src_ia1 (file,
+ inst->bits1.ia1.src0_reg_type,
+ inst->bits1.ia1.src0_reg_file,
+ inst->bits2.ia1.src0_indirect_offset,
+ inst->bits2.ia1.src0_subreg_nr,
+ inst->bits2.ia1.src0_negate,
+ inst->bits2.ia1.src0_abs,
+ inst->bits2.ia1.src0_address_mode,
+ inst->bits2.ia1.src0_horiz_stride,
+ inst->bits2.ia1.src0_width,
+ inst->bits2.ia1.src0_vert_stride);
+ }
+ }
+ else
+ {
+ if (inst->bits2.da16.src0_address_mode == GEN_ADDRESS_DIRECT)
+ {
+ return src_da16 (file,
+ inst->bits1.da16.src0_reg_type,
+ inst->bits1.da16.src0_reg_file,
+ inst->bits2.da16.src0_vert_stride,
+ inst->bits2.da16.src0_reg_nr,
+ inst->bits2.da16.src0_subreg_nr,
+ inst->bits2.da16.src0_abs,
+ inst->bits2.da16.src0_negate,
+ inst->bits2.da16.src0_swz_x,
+ inst->bits2.da16.src0_swz_y,
+ inst->bits2.da16.src0_swz_z,
+ inst->bits2.da16.src0_swz_w);
+ }
+ else
+ {
+ string (file, "Indirect align16 address mode not supported");
+ return 1;
+ }
+ }
+}
+
+static int src1 (FILE *file, const struct GenInstruction *inst)
+{
+ if (inst->bits1.da1.src1_reg_file == GEN_IMMEDIATE_VALUE)
+ return imm (file, inst->bits1.da1.src1_reg_type,
+ inst);
+ else if (inst->header.access_mode == GEN_ALIGN_1)
+ {
+ if (inst->bits3.da1.src1_address_mode == GEN_ADDRESS_DIRECT)
+ {
+ return src_da1 (file,
+ inst->bits1.da1.src1_reg_type,
+ inst->bits1.da1.src1_reg_file,
+ inst->bits3.da1.src1_vert_stride,
+ inst->bits3.da1.src1_width,
+ inst->bits3.da1.src1_horiz_stride,
+ inst->bits3.da1.src1_reg_nr,
+ inst->bits3.da1.src1_subreg_nr,
+ inst->bits3.da1.src1_abs,
+ inst->bits3.da1.src1_negate);
+ }
+ else
+ {
+ return src_ia1 (file,
+ inst->bits1.ia1.src1_reg_type,
+ inst->bits1.ia1.src1_reg_file,
+ inst->bits3.ia1.src1_indirect_offset,
+ inst->bits3.ia1.src1_subreg_nr,
+ inst->bits3.ia1.src1_negate,
+ inst->bits3.ia1.src1_abs,
+ inst->bits3.ia1.src1_address_mode,
+ inst->bits3.ia1.src1_horiz_stride,
+ inst->bits3.ia1.src1_width,
+ inst->bits3.ia1.src1_vert_stride);
+ }
+ }
+ else
+ {
+ if (inst->bits3.da16.src1_address_mode == GEN_ADDRESS_DIRECT)
+ {
+ return src_da16 (file,
+ inst->bits1.da16.src1_reg_type,
+ inst->bits1.da16.src1_reg_file,
+ inst->bits3.da16.src1_vert_stride,
+ inst->bits3.da16.src1_reg_nr,
+ inst->bits3.da16.src1_subreg_nr,
+ inst->bits3.da16.src1_abs,
+ inst->bits3.da16.src1_negate,
+ inst->bits3.da16.src1_swz_x,
+ inst->bits3.da16.src1_swz_y,
+ inst->bits3.da16.src1_swz_z,
+ inst->bits3.da16.src1_swz_w);
+ }
+ else
+ {
+ string (file, "Indirect align16 address mode not supported");
+ return 1;
+ }
+ }
+}
+
+static const int esize[6] = {
+ [0] = 1,
+ [1] = 2,
+ [2] = 4,
+ [3] = 8,
+ [4] = 16,
+ [5] = 32,
+};
+
+static int qtr_ctrl(FILE *file, const struct GenInstruction *inst)
+{
+ int qtr_ctl = inst->header.quarter_control;
+ int exec_size = esize[inst->header.execution_size];
+
+ if (exec_size == 8) {
+ switch (qtr_ctl) {
+ case 0:
+ string (file, " 1Q");
+ break;
+ case 1:
+ string (file, " 2Q");
+ break;
+ case 2:
+ string (file, " 3Q");
+ break;
+ case 3:
+ string (file, " 4Q");
+ break;
+ }
+ } else if (exec_size == 16){
+ if (qtr_ctl < 2)
+ string (file, " 1H");
+ else
+ string (file, " 2H");
+ }
+ return 0;
+}
+
+int gen_disasm (FILE *file, const void *opaque_insn)
+{
+ const struct GenInstruction *inst = (const struct GenInstruction *) opaque_insn;
+ int err = 0;
+ int space = 0;
+ int gen = 7;
+
+ if (inst->header.predicate_control) {
+ string (file, "(");
+ err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL);
+ string (file, "f0");
+ if (inst->bits2.da1.flag_reg_nr)
+ format (file, ".%d", inst->bits2.da1.flag_reg_nr);
+ if (inst->header.access_mode == GEN_ALIGN_1)
+ err |= control (file, "predicate control align1", pred_ctrl_align1,
+ inst->header.predicate_control, NULL);
+ else
+ err |= control (file, "predicate control align16", pred_ctrl_align16,
+ inst->header.predicate_control, NULL);
+ string (file, ") ");
+ }
+
+ err |= print_opcode (file, inst->header.opcode);
+ err |= control (file, "saturate", saturate, inst->header.saturate, NULL);
+ err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL);
+
+ if (inst->header.opcode == GEN_OPCODE_MATH) {
+ string (file, " ");
+ err |= control (file, "function", math_function,
+ inst->header.destreg_or_condmod, NULL);
+ } else if (inst->header.opcode != GEN_OPCODE_SEND &&
+ inst->header.opcode != GEN_OPCODE_SENDC)
+ err |= control (file, "conditional modifier", conditional_modifier,
+ inst->header.destreg_or_condmod, NULL);
+
+ if (inst->header.opcode != GEN_OPCODE_NOP) {
+ string (file, "(");
+ err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL);
+ string (file, ")");
+ }
+
+ if (inst->header.opcode == GEN_OPCODE_SEND && gen < 6)
+ format (file, " %d", inst->header.destreg_or_condmod);
+
+ if (opcode[inst->header.opcode].nsrc == 3) {
+ pad (file, 16);
+ err |= dest_3src (file, inst);
+
+ pad (file, 32);
+ err |= src0_3src (file, inst);
+
+ pad (file, 48);
+ err |= src1_3src (file, inst);
+
+ pad (file, 64);
+ err |= src2_3src (file, inst);
+ } else {
+ if (opcode[inst->header.opcode].ndst > 0) {
+ pad (file, 16);
+ err |= dest (file, inst);
+ } else if (gen >= 6 && (inst->header.opcode == GEN_OPCODE_IF ||
+ inst->header.opcode == GEN_OPCODE_ELSE ||
+ inst->header.opcode == GEN_OPCODE_ENDIF ||
+ inst->header.opcode == GEN_OPCODE_WHILE)) {
+ // XXX format (file, " %d", inst->bits1.branch_gen6.jump_count);
+ assert(0);
+ } else if (gen >= 6 && (inst->header.opcode == GEN_OPCODE_BREAK ||
+ inst->header.opcode == GEN_OPCODE_CONTINUE ||
+ inst->header.opcode == GEN_OPCODE_HALT)) {
+ // XXX format (file, " %d %d", inst->bits3.break_cont.uip, inst->bits3.break_cont.jip);
+ assert(0);
+ } else if (inst->header.opcode == GEN_OPCODE_JMPI) {
+ format (file, " %d", inst->bits3.d);
+ }
+
+ if (opcode[inst->header.opcode].nsrc > 0) {
+ pad (file, 32);
+ err |= src0 (file, inst);
+ }
+ if (opcode[inst->header.opcode].nsrc > 1) {
+ pad (file, 48);
+ err |= src1 (file, inst);
+ }
+ }
+
+ if (inst->header.opcode == GEN_OPCODE_SEND ||
+ inst->header.opcode == GEN_OPCODE_SENDC) {
+ enum GenMessageTarget target = inst->header.destreg_or_condmod;
+
+ newline (file);
+ pad (file, 16);
+ space = 0;
+
+ err |= control (file, "target function", target_function_gen6,
+ target, &space);
+
+ switch (target) {
+ case GEN_SFID_MATH:
+ err |= control (file, "math function", math_function,
+ inst->bits3.math_gen5.function, &space);
+ err |= control (file, "math saturate", math_saturate,
+ inst->bits3.math_gen5.saturate, &space);
+ err |= control (file, "math signed", math_signed,
+ inst->bits3.math_gen5.int_type, &space);
+ err |= control (file, "math scalar", math_scalar,
+ inst->bits3.math_gen5.data_type, &space);
+ err |= control (file, "math precision", math_precision,
+ inst->bits3.math_gen5.precision, &space);
+ break;
+ case GEN_SFID_SAMPLER:
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.sampler_gen7.bti,
+ inst->bits3.sampler_gen7.sampler,
+ inst->bits3.sampler_gen7.msg_type,
+ inst->bits3.sampler_gen7.simd_mode);
+ break;
+ case GEN_SFID_DATAPORT_DATA_CACHE:
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.gen7_untyped_rw.bti,
+ inst->bits3.gen7_untyped_rw.rgba,
+ inst->bits3.gen7_untyped_rw.simd_mode,
+ inst->bits3.gen7_untyped_rw.msg_type);
+ break;
+
+ default:
+ format (file, "unsupported target %d", target);
+ break;
+ }
+ if (space)
+ string (file, " ");
+ format (file, "mlen %d", inst->bits3.generic_gen5.msg_length);
+ format (file, " rlen %d", inst->bits3.generic_gen5.response_length);
+ }
+ pad (file, 64);
+ if (inst->header.opcode != GEN_OPCODE_NOP) {
+ string (file, "{");
+ space = 1;
+ err |= control(file, "access mode", access_mode, inst->header.access_mode, &space);
+ if (gen >= 6)
+ err |= control (file, "write enable control", wectrl, inst->header.mask_control, &space);
+ else
+ err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space);
+ err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space);
+
+ err |= qtr_ctrl (file, inst);
+ err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space);
+ if (gen >= 6)
+ err |= control (file, "acc write control", accwr, inst->header.acc_wr_control, &space);
+ if (inst->header.opcode == GEN_OPCODE_SEND ||
+ inst->header.opcode == GEN_OPCODE_SENDC)
+ err |= control (file, "end of thread", end_of_thread,
+ inst->bits3.generic_gen5.end_of_thread, &space);
+ if (space)
+ string (file, " ");
+ string (file, "}");
+ }
+ string (file, ";");
+ newline (file);
+ return err;
+}
+
diff --git a/backend/src/backend/gen/gen_mesa_disasm.h b/backend/src/backend/gen/gen_mesa_disasm.h
new file mode 100644
index 0000000..6185061
--- /dev/null
+++ b/backend/src/backend/gen/gen_mesa_disasm.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file gen_mesa_disasm.h
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ *
+ * To decode and print one Gen ISA instruction. The code is directly taken
+ * from Mesa
+ */
+
+#ifndef __GBE_GEN_MESA_DISASM_H__
+#define __GBE_GEN_MESA_DISASM_H__
+
+#include <stdio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+extern int gen_disasm(FILE *file, const void *opaque_insn);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __GBE_GEN_MESA_DISASM_H__ */
+
+
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
new file mode 100644
index 0000000..59e738c
--- /dev/null
+++ b/backend/src/backend/gen_context.cpp
@@ -0,0 +1,286 @@
+/*
+ * Copyright © 2012 Intel Corporatin
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file gen_context.cpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "backend/gen_context.hpp"
+#include "backend/gen_program.hpp"
+#include "backend/gen_defs.hpp"
+#include "backend/gen_encoder.hpp"
+#include "backend/gen_insn_selection.hpp"
+#include "backend/gen_insn_scheduling.hpp"
+#include "backend/gen_reg_allocation.hpp"
+#include "backend/gen/gen_mesa_disasm.h"
+#include "ir/function.hpp"
+#include "sys/cvar.hpp"
+#include <cstring>
+
+namespace gbe
+{
+ ///////////////////////////////////////////////////////////////////////////
+ // GenContext implementation
+ ///////////////////////////////////////////////////////////////////////////
+ GenContext::GenContext(const ir::Unit &unit,
+ const std::string &name,
+ bool limitRegisterPressure) :
+ Context(unit, name), limitRegisterPressure(limitRegisterPressure)
+ {
+ this->p = GBE_NEW(GenEncoder, simdWidth, 7); // XXX handle more than Gen7
+ this->sel = GBE_NEW(Selection, *this);
+ this->ra = GBE_NEW(GenRegAllocator, *this);
+ }
+
+ GenContext::~GenContext(void) {
+ GBE_DELETE(this->ra);
+ GBE_DELETE(this->sel);
+ GBE_DELETE(this->p);
+ }
+
+ void GenContext::emitInstructionStream(void) {
+ // Emit Gen ISA
+ for (auto &block : *sel->blockList)
+ for (auto &insn : block.insnList) {
+ const uint32_t opcode = insn.opcode;
+ p->push();
+ // no more virtual register here in that part of the code generation
+ GBE_ASSERT(insn.state.physicalFlag);
+ p->curr = insn.state;
+ switch (opcode) {
+#define DECL_SELECTION_IR(OPCODE, FAMILY) \
+ case SEL_OP_##OPCODE: this->emit##FAMILY(insn); break;
+#include "backend/gen_insn_selection.hxx"
+#undef DECL_INSN
+ }
+ p->pop();
+ }
+ }
+
+ void GenContext::patchBranches(void) {
+ using namespace ir;
+ for (auto pair : branchPos2) {
+ const LabelIndex label = pair.first;
+ const int32_t insnID = pair.second;
+ const int32_t targetID = labelPos.find(label)->second;
+ p->patchJMPI(insnID, (targetID-insnID-1) * 2);
+ }
+ }
+
+ void GenContext::emitStackPointer(void) {
+ using namespace ir;
+
+ // Only emit stack pointer computation if we use a stack
+ if (kernel->getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) <= 0)
+ return;
+
+ // Check that everything is consistent in the kernel code
+ const uint32_t perLaneSize = kernel->getStackSize();
+ const uint32_t perThreadSize = perLaneSize * this->simdWidth;
+ const int32_t offset = GEN_REG_SIZE + kernel->getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER);
+ GBE_ASSERT(perLaneSize > 0);
+ GBE_ASSERT(isPowerOf<2>(perLaneSize) == true);
+ GBE_ASSERT(isPowerOf<2>(perThreadSize) == true);
+
+ // Use shifts rather than muls which are limited to 32x16 bit sources
+ const uint32_t perLaneShift = logi2(perLaneSize);
+ const uint32_t perThreadShift = logi2(perThreadSize);
+ const GenRegister selStatckPtr = this->simdWidth == 8 ?
+ GenRegister::ud8grf(ir::ocl::stackptr) :
+ GenRegister::ud16grf(ir::ocl::stackptr);
+ const GenRegister stackptr = ra->genReg(selStatckPtr);
+ const uint32_t nr = offset / GEN_REG_SIZE;
+ const uint32_t subnr = (offset % GEN_REG_SIZE) / sizeof(uint32_t);
+ const GenRegister bufferptr = GenRegister::ud1grf(nr, subnr);
+
+ // We compute the per-lane stack pointer here
+ p->push();
+ p->curr.execWidth = 1;
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->SHR(GenRegister::ud1grf(126,0), GenRegister::ud1grf(0,5), GenRegister::immud(10));
+ p->curr.execWidth = this->simdWidth;
+ p->SHL(stackptr, stackptr, GenRegister::immud(perLaneShift));
+ p->curr.execWidth = 1;
+ p->SHL(GenRegister::ud1grf(126,0), GenRegister::ud1grf(126,0), GenRegister::immud(perThreadShift));
+ p->curr.execWidth = this->simdWidth;
+ p->ADD(stackptr, stackptr, bufferptr);
+ p->ADD(stackptr, stackptr, GenRegister::ud1grf(126,0));
+ p->pop();
+ }
+
+ void GenContext::emitLabelInstruction(const SelectionInstruction &insn) {
+ const ir::LabelIndex label(insn.index);
+ this->labelPos.insert(std::make_pair(label, p->store.size()));
+ }
+
+ void GenContext::emitUnaryInstruction(const SelectionInstruction &insn) {
+ const GenRegister dst = ra->genReg(insn.dst(0));
+ const GenRegister src = ra->genReg(insn.src(0));
+ switch (insn.opcode) {
+ case SEL_OP_MOV: p->MOV(dst, src); break;
+ case SEL_OP_NOT: p->NOT(dst, src); break;
+ case SEL_OP_RNDD: p->RNDD(dst, src); break;
+ case SEL_OP_RNDU: p->RNDU(dst, src); break;
+ case SEL_OP_RNDE: p->RNDE(dst, src); break;
+ case SEL_OP_RNDZ: p->RNDZ(dst, src); break;
+ default: NOT_IMPLEMENTED;
+ }
+ }
+
+ void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) {
+ const GenRegister dst = ra->genReg(insn.dst(0));
+ const GenRegister src0 = ra->genReg(insn.src(0));
+ const GenRegister src1 = ra->genReg(insn.src(1));
+ switch (insn.opcode) {
+ case SEL_OP_SEL: p->SEL(dst, src0, src1); break;
+ case SEL_OP_AND: p->AND(dst, src0, src1); break;
+ case SEL_OP_OR: p->OR (dst, src0, src1); break;
+ case SEL_OP_XOR: p->XOR(dst, src0, src1); break;
+ case SEL_OP_SHR: p->SHR(dst, src0, src1); break;
+ case SEL_OP_SHL: p->SHL(dst, src0, src1); break;
+ case SEL_OP_RSR: p->RSR(dst, src0, src1); break;
+ case SEL_OP_RSL: p->RSL(dst, src0, src1); break;
+ case SEL_OP_ASR: p->ASR(dst, src0, src1); break;
+ case SEL_OP_ADD: p->ADD(dst, src0, src1); break;
+ case SEL_OP_MUL: p->MUL(dst, src0, src1); break;
+ case SEL_OP_MACH: p->MACH(dst, src0, src1); break;
+ default: NOT_IMPLEMENTED;
+ }
+ }
+
+ void GenContext::emitTernaryInstruction(const SelectionInstruction &insn) {
+ const GenRegister dst = ra->genReg(insn.dst(0));
+ const GenRegister src0 = ra->genReg(insn.src(0));
+ const GenRegister src1 = ra->genReg(insn.src(1));
+ const GenRegister src2 = ra->genReg(insn.src(2));
+ switch (insn.opcode) {
+ case SEL_OP_MAD: p->MAD(dst, src0, src1, src2); break;
+ default: NOT_IMPLEMENTED;
+ }
+ }
+
+ void GenContext::emitNoOpInstruction(const SelectionInstruction &insn) {
+ NOT_IMPLEMENTED;
+ }
+
+ void GenContext::emitWaitInstruction(const SelectionInstruction &insn) {
+ p->WAIT();
+ }
+
+ void GenContext::emitBarrierInstruction(const SelectionInstruction &insn) {
+ const GenRegister src = ra->genReg(insn.src(0));
+ p->BARRIER(src);
+ }
+
+ void GenContext::emitMathInstruction(const SelectionInstruction &insn) {
+ const GenRegister dst = ra->genReg(insn.dst(0));
+ const GenRegister src0 = ra->genReg(insn.src(0));
+ const uint32_t function = insn.extra.function;
+ if (insn.srcNum == 2) {
+ const GenRegister src1 = ra->genReg(insn.src(1));
+ p->MATH(dst, function, src0, src1);
+ } else
+ p->MATH(dst, function, src0);
+ }
+
+ void GenContext::emitCompareInstruction(const SelectionInstruction &insn) {
+ const GenRegister src0 = ra->genReg(insn.src(0));
+ const GenRegister src1 = ra->genReg(insn.src(1));
+ if (insn.opcode == SEL_OP_CMP)
+ p->CMP(insn.extra.function, src0, src1);
+ else {
+ GBE_ASSERT(insn.opcode == SEL_OP_SEL_CMP);
+ const GenRegister dst = ra->genReg(insn.dst(0));
+ p->SEL_CMP(insn.extra.function, dst, src0, src1);
+ }
+ }
+
+ void GenContext::emitJumpInstruction(const SelectionInstruction &insn) {
+ const ir::LabelIndex label(insn.index);
+ const GenRegister src = ra->genReg(insn.src(0));
+ this->branchPos2.push_back(std::make_pair(label, p->store.size()));
+ p->JMPI(src);
+ }
+
+ void GenContext::emitEotInstruction(const SelectionInstruction &insn) {
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.execWidth = 8;
+ p->curr.noMask = 1;
+ p->EOT(0);
+ p->pop();
+ }
+
+ void GenContext::emitUntypedReadInstruction(const SelectionInstruction &insn) {
+ const GenRegister dst = ra->genReg(insn.dst(0));
+ const GenRegister src = ra->genReg(insn.src(0));
+ const uint32_t bti = insn.extra.function;
+ const uint32_t elemNum = insn.extra.elem;
+ p->UNTYPED_READ(dst, src, bti, elemNum);
+ }
+
+ void GenContext::emitUntypedWriteInstruction(const SelectionInstruction &insn) {
+ const GenRegister src = ra->genReg(insn.src(0));
+ const uint32_t bti = insn.extra.function;
+ const uint32_t elemNum = insn.extra.elem;
+ p->UNTYPED_WRITE(src, bti, elemNum);
+ }
+
+ void GenContext::emitByteGatherInstruction(const SelectionInstruction &insn) {
+ const GenRegister dst = ra->genReg(insn.dst(0));
+ const GenRegister src = ra->genReg(insn.src(0));
+ const uint32_t bti = insn.extra.function;
+ const uint32_t elemSize = insn.extra.elem;
+ p->BYTE_GATHER(dst, src, bti, elemSize);
+ }
+
+ void GenContext::emitByteScatterInstruction(const SelectionInstruction &insn) {
+ const GenRegister src = ra->genReg(insn.src(0));
+ const uint32_t bti = insn.extra.function;
+ const uint32_t elemSize = insn.extra.elem;
+ p->BYTE_SCATTER(src, bti, elemSize);
+ }
+
+ BVAR(OCL_OUTPUT_ASM, false);
+ bool GenContext::emitCode(void) {
+ GenKernel *genKernel = static_cast<GenKernel*>(this->kernel);
+ sel->select();
+ schedulePreRegAllocation(*this, *this->sel);
+ if (UNLIKELY(ra->allocate(*this->sel) == false))
+ return false;
+ schedulePostRegAllocation(*this, *this->sel);
+ this->emitStackPointer();
+ this->emitInstructionStream();
+ this->patchBranches();
+ genKernel->insnNum = p->store.size();
+ genKernel->insns = GBE_NEW_ARRAY_NO_ARG(GenInstruction, genKernel->insnNum);
+ std::memcpy(genKernel->insns, &p->store[0], genKernel->insnNum * sizeof(GenInstruction));
+ if (OCL_OUTPUT_ASM)
+ for (uint32_t insnID = 0; insnID < genKernel->insnNum; ++insnID)
+ gen_disasm(stdout, &p->store[insnID]);
+ return true;
+ }
+
+ Kernel *GenContext::allocateKernel(void) {
+ return GBE_NEW(GenKernel, name);
+ }
+
+} /* namespace gbe */
+
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
new file mode 100644
index 0000000..d0355fe
--- /dev/null
+++ b/backend/src/backend/gen_context.hpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file gen_context.hpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __GBE_GEN_CONTEXT_HPP__
+#define __GBE_GEN_CONTEXT_HPP__
+
+#include "backend/context.hpp"
+#include "backend/program.h"
+#include "ir/function.hpp"
+#include "ir/liveness.hpp"
+#include "sys/map.hpp"
+#include <string>
+
+namespace gbe
+{
+ class Kernel; // We build this structure
+ class GenEncoder; // Helps emitting Gen ISA
+ class GenRegAllocator; // Handle the register allocation
+ class Selection; // Performs instruction selection
+ class SelectionInstruction; // Pre-RA Gen instruction
+ class SelectionReg; // Pre-RA Gen register
+
+ /*! Context is the helper structure to build the Gen ISA or simulation code
+ * from GenIR
+ */
+ class GenContext : public Context
+ {
+ public:
+ /*! Create a new context. name is the name of the function we want to
+ * compile
+ */
+ GenContext(const ir::Unit &unit, const std::string &name, bool limitRegisterPressure = false);
+ /*! Release everything needed */
+ ~GenContext(void);
+ /*! Implements base class */
+ virtual bool emitCode(void);
+ /*! Function we emit code for */
+ INLINE const ir::Function &getFunction(void) const { return fn; }
+ /*! Simd width chosen for the current function */
+ INLINE uint32_t getSimdWidth(void) const { return simdWidth; }
+ /*! Emit the per-lane stack pointer computation */
+ void emitStackPointer(void);
+ /*! Emit the instructions */
+ void emitInstructionStream(void);
+ /*! Set the correct target values for the branches */
+ void patchBranches(void);
+ /*! Forward ir::Function isSpecialReg method */
+ INLINE bool isSpecialReg(ir::Register reg) const {
+ return fn.isSpecialReg(reg);
+ }
+ /*! Get the liveOut information for the given block */
+ INLINE const ir::Liveness::LiveOut &getLiveOut(const ir::BasicBlock *bb) const {
+ return this->liveness->getLiveOut(bb);
+ }
+ /*! Final Gen ISA emission helper functions */
+ void emitLabelInstruction(const SelectionInstruction &insn);
+ void emitUnaryInstruction(const SelectionInstruction &insn);
+ void emitBinaryInstruction(const SelectionInstruction &insn);
+ void emitTernaryInstruction(const SelectionInstruction &insn);
+ void emitCompareInstruction(const SelectionInstruction &insn);
+ void emitJumpInstruction(const SelectionInstruction &insn);
+ void emitEotInstruction(const SelectionInstruction &insn);
+ void emitNoOpInstruction(const SelectionInstruction &insn);
+ void emitWaitInstruction(const SelectionInstruction &insn);
+ void emitBarrierInstruction(const SelectionInstruction &insn);
+ void emitMathInstruction(const SelectionInstruction &insn);
+ void emitUntypedReadInstruction(const SelectionInstruction &insn);
+ void emitUntypedWriteInstruction(const SelectionInstruction &insn);
+ void emitByteGatherInstruction(const SelectionInstruction &insn);
+ void emitByteScatterInstruction(const SelectionInstruction &insn);
+
+ /*! Implements base class */
+ virtual Kernel *allocateKernel(void);
+ /*! Store the position of each label instruction in the Gen ISA stream */
+ map<ir::LabelIndex, uint32_t> labelPos;
+ /*! Store the Gen instructions to patch */
+ vector<std::pair<ir::LabelIndex, uint32_t>> branchPos2;
+ /*! Encode Gen ISA */
+ GenEncoder *p;
+ /*! Instruction selection on Gen ISA (pre-register allocation) */
+ Selection *sel;
+ /*! Perform the register allocation */
+ GenRegAllocator *ra;
+ /*! Indicate if we need to tackle a register pressure issue when
+ * regenerating the code
+ */
+ bool limitRegisterPressure;
+ };
+
+} /* namespace gbe */
+
+#endif /* __GBE_GEN_CONTEXT_HPP__ */
+
diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
new file mode 100644
index 0000000..008a298
--- /dev/null
+++ b/backend/src/backend/gen_defs.hpp
@@ -0,0 +1,757 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith at tungstengraphics.com>
+ */
+
+#ifndef __GEN_DEFS_HPP__
+#define __GEN_DEFS_HPP__
+
+#include <stdint.h>
+
+/////////////////////////////////////////////////////////////////////////////
+// Gen EU defines
+/////////////////////////////////////////////////////////////////////////////
+
+/* Execution Unit (EU) defines */
+#define GEN_ALIGN_1 0
+#define GEN_ALIGN_16 1
+
+#define GEN_REG_SIZE 32
+
+#define GEN_ADDRESS_DIRECT 0
+#define GEN_ADDRESS_REGISTER_INDIRECT_REGISTER 1
+
+#define GEN_CHANNEL_X 0
+#define GEN_CHANNEL_Y 1
+#define GEN_CHANNEL_Z 2
+#define GEN_CHANNEL_W 3
+
+#define GEN_COMPRESSION_Q1 0
+#define GEN_COMPRESSION_Q2 1
+#define GEN_COMPRESSION_Q3 2
+#define GEN_COMPRESSION_Q4 3
+#define GEN_COMPRESSION_H1 0
+#define GEN_COMPRESSION_H2 2
+
+#define GEN_CONDITIONAL_NONE 0
+#define GEN_CONDITIONAL_Z 1
+#define GEN_CONDITIONAL_NZ 2
+#define GEN_CONDITIONAL_EQ 1 /* Z */
+#define GEN_CONDITIONAL_NEQ 2 /* NZ */
+#define GEN_CONDITIONAL_G 3
+#define GEN_CONDITIONAL_GE 4
+#define GEN_CONDITIONAL_L 5
+#define GEN_CONDITIONAL_LE 6
+#define GEN_CONDITIONAL_R 7
+#define GEN_CONDITIONAL_O 8
+#define GEN_CONDITIONAL_U 9
+
+#define GEN_DEBUG_NONE 0
+#define GEN_DEBUG_BREAKPOINT 1
+
+#define GEN_DEPENDENCY_NORMAL 0
+#define GEN_DEPENDENCY_NOTCLEARED 1
+#define GEN_DEPENDENCY_NOTCHECKED 2
+#define GEN_DEPENDENCY_DISABLE 3
+
+#define GEN_HORIZONTAL_STRIDE_0 0
+#define GEN_HORIZONTAL_STRIDE_1 1
+#define GEN_HORIZONTAL_STRIDE_2 2
+#define GEN_HORIZONTAL_STRIDE_4 3
+
+#define GEN_INSTRUCTION_NORMAL 0
+#define GEN_INSTRUCTION_SATURATE 1
+
+#define GEN_MASK_ENABLE 0
+#define GEN_MASK_DISABLE 1
+
+/*! Gen opcode */
+enum opcode {
+ GEN_OPCODE_MOV = 1,
+ GEN_OPCODE_SEL = 2,
+ GEN_OPCODE_NOT = 4,
+ GEN_OPCODE_AND = 5,
+ GEN_OPCODE_OR = 6,
+ GEN_OPCODE_XOR = 7,
+ GEN_OPCODE_SHR = 8,
+ GEN_OPCODE_SHL = 9,
+ GEN_OPCODE_RSR = 10,
+ GEN_OPCODE_RSL = 11,
+ GEN_OPCODE_ASR = 12,
+ GEN_OPCODE_CMP = 16,
+ GEN_OPCODE_CMPN = 17,
+ GEN_OPCODE_JMPI = 32,
+ GEN_OPCODE_IF = 34,
+ GEN_OPCODE_IFF = 35,
+ GEN_OPCODE_ELSE = 36,
+ GEN_OPCODE_ENDIF = 37,
+ GEN_OPCODE_DO = 38,
+ GEN_OPCODE_WHILE = 39,
+ GEN_OPCODE_BREAK = 40,
+ GEN_OPCODE_CONTINUE = 41,
+ GEN_OPCODE_HALT = 42,
+ GEN_OPCODE_MSAVE = 44,
+ GEN_OPCODE_MRESTORE = 45,
+ GEN_OPCODE_PUSH = 46,
+ GEN_OPCODE_POP = 47,
+ GEN_OPCODE_WAIT = 48,
+ GEN_OPCODE_SEND = 49,
+ GEN_OPCODE_SENDC = 50,
+ GEN_OPCODE_MATH = 56,
+ GEN_OPCODE_ADD = 64,
+ GEN_OPCODE_MUL = 65,
+ GEN_OPCODE_AVG = 66,
+ GEN_OPCODE_FRC = 67,
+ GEN_OPCODE_RNDU = 68,
+ GEN_OPCODE_RNDD = 69,
+ GEN_OPCODE_RNDE = 70,
+ GEN_OPCODE_RNDZ = 71,
+ GEN_OPCODE_MAC = 72,
+ GEN_OPCODE_MACH = 73,
+ GEN_OPCODE_LZD = 74,
+ GEN_OPCODE_SAD2 = 80,
+ GEN_OPCODE_SADA2 = 81,
+ GEN_OPCODE_DP4 = 84,
+ GEN_OPCODE_DPH = 85,
+ GEN_OPCODE_DP3 = 86,
+ GEN_OPCODE_DP2 = 87,
+ GEN_OPCODE_DPA2 = 88,
+ GEN_OPCODE_LINE = 89,
+ GEN_OPCODE_PLN = 90,
+ GEN_OPCODE_MAD = 91,
+ GEN_OPCODE_NOP = 126,
+};
+
+/*! Gen SFID */
+enum GenMessageTarget {
+ GEN_SFID_NULL = 0,
+ GEN_SFID_MATH = 1,
+ GEN_SFID_SAMPLER = 2,
+ GEN_SFID_MESSAGE_GATEWAY = 3,
+ GEN_SFID_DATAPORT_READ = 4,
+ GEN_SFID_DATAPORT_WRITE = 5,
+ GEN_SFID_URB = 6,
+ GEN_SFID_THREAD_SPAWNER = 7,
+ GEN6_SFID_DATAPORT_SAMPLER_CACHE = 4,
+ GEN6_SFID_DATAPORT_RENDER_CACHE = 5,
+ GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9,
+ GEN_SFID_DATAPORT_DATA_CACHE = 10,
+};
+
+#define GEN_PREDICATE_NONE 0
+#define GEN_PREDICATE_NORMAL 1
+#define GEN_PREDICATE_ALIGN1_ANYV 2
+#define GEN_PREDICATE_ALIGN1_ALLV 3
+#define GEN_PREDICATE_ALIGN1_ANY2H 4
+#define GEN_PREDICATE_ALIGN1_ALL2H 5
+#define GEN_PREDICATE_ALIGN1_ANY4H 6
+#define GEN_PREDICATE_ALIGN1_ALL4H 7
+#define GEN_PREDICATE_ALIGN1_ANY8H 8
+#define GEN_PREDICATE_ALIGN1_ALL8H 9
+#define GEN_PREDICATE_ALIGN1_ANY16H 10
+#define GEN_PREDICATE_ALIGN1_ALL16H 11
+#define GEN_PREDICATE_ALIGN16_REPLICATE_X 2
+#define GEN_PREDICATE_ALIGN16_REPLICATE_Y 3
+#define GEN_PREDICATE_ALIGN16_REPLICATE_Z 4
+#define GEN_PREDICATE_ALIGN16_REPLICATE_W 5
+#define GEN_PREDICATE_ALIGN16_ANY4H 6
+#define GEN_PREDICATE_ALIGN16_ALL4H 7
+
+#define GEN_ARCHITECTURE_REGISTER_FILE 0
+#define GEN_GENERAL_REGISTER_FILE 1
+#define GEN_IMMEDIATE_VALUE 3
+
+#define GEN_TYPE_UD 0
+#define GEN_TYPE_D 1
+#define GEN_TYPE_UW 2
+#define GEN_TYPE_W 3
+#define GEN_TYPE_UB 4
+#define GEN_TYPE_B 5
+#define GEN_TYPE_VF 5 /* packed float vector, immediates only? */
+#define GEN_TYPE_HF 6
+#define GEN_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
+#define GEN_TYPE_F 7
+
+#define GEN_ARF_NULL 0x00
+#define GEN_ARF_ADDRESS 0x10
+#define GEN_ARF_ACCUMULATOR 0x20
+#define GEN_ARF_FLAG 0x30
+#define GEN_ARF_MASK 0x40
+#define GEN_ARF_MASK_STACK 0x50
+#define GEN_ARF_MASK_STACK_DEPTH 0x60
+#define GEN_ARF_STATE 0x70
+#define GEN_ARF_CONTROL 0x80
+#define GEN_ARF_NOTIFICATION_COUNT 0x90
+#define GEN_ARF_IP 0xA0
+
+#define GEN_MRF_COMPR4 (1 << 7)
+
+#define GEN_AMASK 0
+#define GEN_IMASK 1
+#define GEN_LMASK 2
+#define GEN_CMASK 3
+
+#define GEN_THREAD_NORMAL 0
+#define GEN_THREAD_ATOMIC 1
+#define GEN_THREAD_SWITCH 2
+
+#define GEN_VERTICAL_STRIDE_0 0
+#define GEN_VERTICAL_STRIDE_1 1
+#define GEN_VERTICAL_STRIDE_2 2
+#define GEN_VERTICAL_STRIDE_4 3
+#define GEN_VERTICAL_STRIDE_8 4
+#define GEN_VERTICAL_STRIDE_16 5
+#define GEN_VERTICAL_STRIDE_32 6
+#define GEN_VERTICAL_STRIDE_64 7
+#define GEN_VERTICAL_STRIDE_128 8
+#define GEN_VERTICAL_STRIDE_256 9
+#define GEN_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
+
+/* Execution width */
+#define GEN_WIDTH_1 0
+#define GEN_WIDTH_2 1
+#define GEN_WIDTH_4 2
+#define GEN_WIDTH_8 3
+#define GEN_WIDTH_16 4
+#define GEN_WIDTH_32 5
+
+/* Channels to enable for the untyped reads and writes */
+#define GEN_UNTYPED_RED (1 << 0)
+#define GEN_UNTYPED_GREEN (1 << 1)
+#define GEN_UNTYPED_BLUE (1 << 2)
+#define GEN_UNTYPED_ALPHA (1 << 3)
+
+/* SIMD mode for untyped reads and writes */
+#define GEN_UNTYPED_SIMD4x2 0
+#define GEN_UNTYPED_SIMD16 1
+#define GEN_UNTYPED_SIMD8 2
+
+/* SIMD mode for byte scatters / gathers */
+#define GEN_BYTE_SCATTER_SIMD8 0
+#define GEN_BYTE_SCATTER_SIMD16 1
+
+/* Data port message type */
+#define GEN_UNTYPED_READ 5
+#define GEN_UNTYPED_WRITE 13
+#define GEN_BYTE_GATHER 4
+#define GEN_BYTE_SCATTER 12
+#define GEN_OBLOCK_READ 0
+#define GEN_OBLOCK_WRITE 8
+
+/* For byte scatters and gathers, the element to write */
+#define GEN_BYTE_SCATTER_BYTE 0
+#define GEN_BYTE_SCATTER_WORD 1
+#define GEN_BYTE_SCATTER_DWORD 2
+
+#define GEN_SAMPLER_RETURN_FORMAT_FLOAT32 0
+#define GEN_SAMPLER_RETURN_FORMAT_UINT32 2
+#define GEN_SAMPLER_RETURN_FORMAT_SINT32 3
+
+#define GEN_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
+#define GEN_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
+#define GEN_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
+#define GEN_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
+#define GEN_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
+#define GEN_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
+#define GEN_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
+#define GEN_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
+#define GEN_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
+#define GEN_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
+#define GEN_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0
+#define GEN_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE 1
+#define GEN_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1
+#define GEN_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
+#define GEN_SAMPLER_MESSAGE_SIMD16_RESINFO 2
+#define GEN_SAMPLER_MESSAGE_SIMD4X2_LD 3
+#define GEN_SAMPLER_MESSAGE_SIMD8_LD 3
+#define GEN_SAMPLER_MESSAGE_SIMD16_LD 3
+
+#define GEN5_SAMPLER_MESSAGE_SAMPLE 0
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS 1
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD 2
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE 3
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS 4
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE 6
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_LD 7
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO 10
+
+/* for GEN5 only */
+#define GEN_SAMPLER_SIMD_MODE_SIMD4X2 0
+#define GEN_SAMPLER_SIMD_MODE_SIMD8 1
+#define GEN_SAMPLER_SIMD_MODE_SIMD16 2
+#define GEN_SAMPLER_SIMD_MODE_SIMD32_64 3
+
+#define GEN_MATH_FUNCTION_INV 1
+#define GEN_MATH_FUNCTION_LOG 2
+#define GEN_MATH_FUNCTION_EXP 3
+#define GEN_MATH_FUNCTION_SQRT 4
+#define GEN_MATH_FUNCTION_RSQ 5
+#define GEN_MATH_FUNCTION_SIN 6 /* was 7 */
+#define GEN_MATH_FUNCTION_COS 7 /* was 8 */
+#define GEN_MATH_FUNCTION_SINCOS 8 /* was 6 */
+#define GEN_MATH_FUNCTION_TAN 9 /* gen4 */
+#define GEN_MATH_FUNCTION_FDIV 9 /* gen6+ */
+#define GEN_MATH_FUNCTION_POW 10
+#define GEN_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
+#define GEN_MATH_FUNCTION_INT_DIV_QUOTIENT 12
+#define GEN_MATH_FUNCTION_INT_DIV_REMAINDER 13
+
+#define GEN_MATH_INTEGER_UNSIGNED 0
+#define GEN_MATH_INTEGER_SIGNED 1
+
+#define GEN_MATH_PRECISION_FULL 0
+#define GEN_MATH_PRECISION_PARTIAL 1
+
+#define GEN_MATH_SATURATE_NONE 0
+#define GEN_MATH_SATURATE_SATURATE 1
+
+#define GEN_MATH_DATA_VECTOR 0
+#define GEN_MATH_DATA_SCALAR 1
+
+#define GEN_DEREFERENCE_URB 0
+#define GEN_DO_NOT_DEREFERENCE_URB 1
+
+#define GEN_MAX_NUM_BUFFER_ENTRIES (1 << 27)
+
+/* Message gateway */
+#define GEN_OPEN_GATEWAY 0b000
+#define GEN_CLOSE_GATEWAY 0b001
+#define GEN_FORWARD_MSG 0b010
+#define GEN_GET_TIME_STAMP 0b011
+#define GEN_BARRIER_MSG 0b100
+#define GEN_UPDATE_GATEWAT_STATE 0b101
+#define GEN_MMIO_READ_WRITE 0b110
+
+/////////////////////////////////////////////////////////////////////////////
+// Gen EU structures
+/////////////////////////////////////////////////////////////////////////////
+
+/** Number of general purpose registers (VS, WM, etc) */
+#define GEN_MAX_GRF 128
+
+/* Instruction format for the execution units */
+struct GenInstruction
+{
+ struct {
+ uint32_t opcode:7;
+ uint32_t pad:1;
+ uint32_t access_mode:1;
+ uint32_t mask_control:1;
+ uint32_t dependency_control:2;
+ uint32_t quarter_control:2;
+ uint32_t thread_control:2;
+ uint32_t predicate_control:4;
+ uint32_t predicate_inverse:1;
+ uint32_t execution_size:3;
+ uint32_t destreg_or_condmod:4;
+ uint32_t acc_wr_control:1;
+ uint32_t cmpt_control:1;
+ uint32_t debug_control:1;
+ uint32_t saturate:1;
+ } header;
+
+ union {
+ struct {
+ uint32_t dest_reg_file:2;
+ uint32_t dest_reg_type:3;
+ uint32_t src0_reg_file:2;
+ uint32_t src0_reg_type:3;
+ uint32_t src1_reg_file:2;
+ uint32_t src1_reg_type:3;
+ uint32_t pad:1;
+ uint32_t dest_subreg_nr:5;
+ uint32_t dest_reg_nr:8;
+ uint32_t dest_horiz_stride:2;
+ uint32_t dest_address_mode:1;
+ } da1;
+
+ struct {
+ uint32_t dest_reg_file:2;
+ uint32_t dest_reg_type:3;
+ uint32_t src0_reg_file:2;
+ uint32_t src0_reg_type:3;
+ uint32_t src1_reg_file:2; /* 0x00000c00 */
+ uint32_t src1_reg_type:3; /* 0x00007000 */
+ uint32_t pad:1;
+ int dest_indirect_offset:10; /* offset against the deref'd address reg */
+ uint32_t dest_subreg_nr:3; /* subnr for the address reg a0.x */
+ uint32_t dest_horiz_stride:2;
+ uint32_t dest_address_mode:1;
+ } ia1;
+
+ struct {
+ uint32_t dest_reg_file:2;
+ uint32_t dest_reg_type:3;
+ uint32_t src0_reg_file:2;
+ uint32_t src0_reg_type:3;
+ uint32_t src1_reg_file:2;
+ uint32_t src1_reg_type:3;
+ uint32_t pad:1;
+ uint32_t dest_writemask:4;
+ uint32_t dest_subreg_nr:1;
+ uint32_t dest_reg_nr:8;
+ uint32_t dest_horiz_stride:2;
+ uint32_t dest_address_mode:1;
+ } da16;
+
+ struct {
+ uint32_t dest_reg_file:2;
+ uint32_t dest_reg_type:3;
+ uint32_t src0_reg_file:2;
+ uint32_t src0_reg_type:3;
+ uint32_t pad0:6;
+ uint32_t dest_writemask:4;
+ int dest_indirect_offset:6;
+ uint32_t dest_subreg_nr:3;
+ uint32_t dest_horiz_stride:2;
+ uint32_t dest_address_mode:1;
+ } ia16;
+
+ struct {
+ uint32_t dest_reg_file:2;
+ uint32_t dest_reg_type:3;
+ uint32_t src0_reg_file:2;
+ uint32_t src0_reg_type:3;
+ uint32_t src1_reg_file:2;
+ uint32_t src1_reg_type:3;
+ uint32_t pad:1;
+ int jump_count:16;
+ } branch_gen6;
+
+ struct {
+ uint32_t dest_reg_file:1;
+ uint32_t flag_subreg_num:1;
+ uint32_t pad0:2;
+ uint32_t src0_abs:1;
+ uint32_t src0_negate:1;
+ uint32_t src1_abs:1;
+ uint32_t src1_negate:1;
+ uint32_t src2_abs:1;
+ uint32_t src2_negate:1;
+ uint32_t pad1:7;
+ uint32_t dest_writemask:4;
+ uint32_t dest_subreg_nr:3;
+ uint32_t dest_reg_nr:8;
+ } da3src;
+ } bits1;
+
+ union {
+ struct {
+ uint32_t src0_subreg_nr:5;
+ uint32_t src0_reg_nr:8;
+ uint32_t src0_abs:1;
+ uint32_t src0_negate:1;
+ uint32_t src0_address_mode:1;
+ uint32_t src0_horiz_stride:2;
+ uint32_t src0_width:3;
+ uint32_t src0_vert_stride:4;
+ uint32_t flag_sub_reg_nr:1;
+ uint32_t flag_reg_nr:1;
+ uint32_t pad:5;
+ } da1;
+
+ struct {
+ int src0_indirect_offset:10;
+ uint32_t src0_subreg_nr:3;
+ uint32_t src0_abs:1;
+ uint32_t src0_negate:1;
+ uint32_t src0_address_mode:1;
+ uint32_t src0_horiz_stride:2;
+ uint32_t src0_width:3;
+ uint32_t src0_vert_stride:4;
+ uint32_t flag_sub_reg_nr:1;
+ uint32_t flag_reg_nr:1;
+ uint32_t pad:5;
+ } ia1;
+
+ struct {
+ uint32_t src0_swz_x:2;
+ uint32_t src0_swz_y:2;
+ uint32_t src0_subreg_nr:1;
+ uint32_t src0_reg_nr:8;
+ uint32_t src0_abs:1;
+ uint32_t src0_negate:1;
+ uint32_t src0_address_mode:1;
+ uint32_t src0_swz_z:2;
+ uint32_t src0_swz_w:2;
+ uint32_t pad0:1;
+ uint32_t src0_vert_stride:4;
+ uint32_t flag_sub_reg_nr:1;
+ uint32_t flag_reg_nr:1;
+ uint32_t pad:5;
+ } da16;
+
+ struct {
+ uint32_t src0_swz_x:2;
+ uint32_t src0_swz_y:2;
+ int src0_indirect_offset:6;
+ uint32_t src0_subreg_nr:3;
+ uint32_t src0_abs:1;
+ uint32_t src0_negate:1;
+ uint32_t src0_address_mode:1;
+ uint32_t src0_swz_z:2;
+ uint32_t src0_swz_w:2;
+ uint32_t pad0:1;
+ uint32_t src0_vert_stride:4;
+ uint32_t flag_sub_reg_nr:1;
+ uint32_t flag_reg_nr:1;
+ uint32_t pad:5;
+ } ia16;
+
+ struct {
+ uint32_t src0_rep_ctrl:1;
+ uint32_t src0_swizzle:8;
+ uint32_t src0_subreg_nr:3;
+ uint32_t src0_reg_nr:8;
+ uint32_t pad0:1;
+ uint32_t src1_rep_ctrl:1;
+ uint32_t src1_swizzle:8;
+ uint32_t src1_subreg_nr_low:2;
+ } da3src;
+ } bits2;
+
+ union {
+ struct {
+ uint32_t src1_subreg_nr:5;
+ uint32_t src1_reg_nr:8;
+ uint32_t src1_abs:1;
+ uint32_t src1_negate:1;
+ uint32_t src1_address_mode:1;
+ uint32_t src1_horiz_stride:2;
+ uint32_t src1_width:3;
+ uint32_t src1_vert_stride:4;
+ uint32_t pad0:7;
+ } da1;
+
+ struct {
+ uint32_t src1_swz_x:2;
+ uint32_t src1_swz_y:2;
+ uint32_t src1_subreg_nr:1;
+ uint32_t src1_reg_nr:8;
+ uint32_t src1_abs:1;
+ uint32_t src1_negate:1;
+ uint32_t src1_address_mode:1;
+ uint32_t src1_swz_z:2;
+ uint32_t src1_swz_w:2;
+ uint32_t pad1:1;
+ uint32_t src1_vert_stride:4;
+ uint32_t pad2:7;
+ } da16;
+
+ struct {
+ int src1_indirect_offset:10;
+ uint32_t src1_subreg_nr:3;
+ uint32_t src1_abs:1;
+ uint32_t src1_negate:1;
+ uint32_t src1_address_mode:1;
+ uint32_t src1_horiz_stride:2;
+ uint32_t src1_width:3;
+ uint32_t src1_vert_stride:4;
+ uint32_t pad1:7;
+ } ia1;
+
+ struct {
+ uint32_t src1_swz_x:2;
+ uint32_t src1_swz_y:2;
+ int src1_indirect_offset:6;
+ uint32_t src1_subreg_nr:3;
+ uint32_t src1_abs:1;
+ uint32_t src1_negate:1;
+ uint32_t pad0:1;
+ uint32_t src1_swz_z:2;
+ uint32_t src1_swz_w:2;
+ uint32_t pad1:1;
+ uint32_t src1_vert_stride:4;
+ uint32_t pad2:7;
+ } ia16;
+
+ struct {
+ uint32_t function_control:19;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad1:2;
+ uint32_t end_of_thread:1;
+ } generic_gen5;
+
+ struct {
+ uint32_t sub_function_id:3;
+ uint32_t pad0:11;
+ uint32_t ack_req:1;
+ uint32_t notify:2;
+ uint32_t pad1:2;
+ uint32_t header:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad2:2;
+ uint32_t end_of_thread:1;
+ } msg_gateway;
+
+ struct {
+ uint32_t opcode:1;
+ uint32_t request:1;
+ uint32_t pad0:2;
+ uint32_t resource:1;
+ uint32_t pad1:14;
+ uint32_t header:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad2:2;
+ uint32_t end_of_thread:1;
+ } spawner_gen5;
+
+ /** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */
+ struct {
+ uint32_t function:4;
+ uint32_t int_type:1;
+ uint32_t precision:1;
+ uint32_t saturate:1;
+ uint32_t data_type:1;
+ uint32_t snapshot:1;
+ uint32_t pad0:10;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad1:2;
+ uint32_t end_of_thread:1;
+ } math_gen5;
+
+ struct {
+ uint32_t bti:8;
+ uint32_t sampler:4;
+ uint32_t msg_type:5;
+ uint32_t simd_mode:2;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad1:2;
+ uint32_t end_of_thread:1;
+ } sampler_gen7;
+
+ /**
+ * Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
+ *
+ * See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1.
+ **/
+ struct {
+ uint32_t bti:8;
+ uint32_t msg_control:5;
+ uint32_t msg_type:3;
+ uint32_t pad0:3;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad1:2;
+ uint32_t end_of_thread:1;
+ } gen6_dp_sampler_const_cache;
+
+ /*! Data port untyped read / write messages */
+ struct {
+ uint32_t bti:8;
+ uint32_t rgba:4;
+ uint32_t simd_mode:2;
+ uint32_t msg_type:4;
+ uint32_t category:1;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad2:2;
+ uint32_t end_of_thread:1;
+ } gen7_untyped_rw;
+
+ /*! Data port byte scatter / gather */
+ struct {
+ uint32_t bti:8;
+ uint32_t simd_mode:1;
+ uint32_t ignored0:1;
+ uint32_t data_size:2;
+ uint32_t ignored1:2;
+ uint32_t msg_type:4;
+ uint32_t category:1;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad2:2;
+ uint32_t end_of_thread:1;
+ } gen7_byte_rw;
+
+ /*! Data port OBlock read / write */
+ struct {
+ uint32_t bti:8;
+ uint32_t block_size:3;
+ uint32_t ignored:2;
+ uint32_t invalidate_after_read:1;
+ uint32_t msg_type:4;
+ uint32_t category:1;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad2:2;
+ uint32_t end_of_thread:1;
+ } gen7_oblock_rw;
+
+ struct {
+ uint32_t src1_subreg_nr_high:1;
+ uint32_t src1_reg_nr:8;
+ uint32_t pad0:1;
+ uint32_t src2_rep_ctrl:1;
+ uint32_t src2_swizzle:8;
+ uint32_t src2_subreg_nr:3;
+ uint32_t src2_reg_nr:8;
+ uint32_t pad1:2;
+ } da3src;
+
+ int d;
+ uint32_t ud;
+ float f;
+ } bits3;
+};
+
+#endif /* __GEN_DEFS_HPP__ */
+
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
new file mode 100644
index 0000000..f0138f8
--- /dev/null
+++ b/backend/src/backend/gen_encoder.cpp
@@ -0,0 +1,846 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith at tungstengraphics.com>
+ */
+
+#include "backend/gen_encoder.hpp"
+#include <cstring>
+
+namespace gbe
+{
+ //////////////////////////////////////////////////////////////////////////
+ // Some helper functions to encode
+ //////////////////////////////////////////////////////////////////////////
+ INLINE bool isVectorOfBytes(GenRegister reg) {
+ if (reg.hstride != GEN_HORIZONTAL_STRIDE_0 &&
+ (reg.type == GEN_TYPE_UB || reg.type == GEN_TYPE_B))
+ return true;
+ else
+ return false;
+ }
+
+ INLINE bool needToSplitAlu1(GenEncoder *p, GenRegister dst, GenRegister src) {
+ if (p->curr.execWidth != 16) return false;
+ if (isVectorOfBytes(dst) == true) return true;
+ if (isVectorOfBytes(src) == true) return true;
+ return false;
+ }
+
+ INLINE bool needToSplitAlu2(GenEncoder *p, GenRegister dst, GenRegister src0, GenRegister src1) {
+ if (p->curr.execWidth != 16) return false;
+ if (isVectorOfBytes(dst) == true) return true;
+ if (isVectorOfBytes(src0) == true) return true;
+ if (isVectorOfBytes(src1) == true) return true;
+ return false;
+ }
+
+ INLINE bool needToSplitCmp(GenEncoder *p, GenRegister src0, GenRegister src1) {
+ if (p->curr.execWidth != 16) return false;
+ if (isVectorOfBytes(src0) == true) return true;
+ if (isVectorOfBytes(src1) == true) return true;
+ if (src0.type == GEN_TYPE_D || src0.type == GEN_TYPE_UD || src0.type == GEN_TYPE_F)
+ return true;
+ if (src1.type == GEN_TYPE_D || src1.type == GEN_TYPE_UD || src1.type == GEN_TYPE_F)
+ return true;
+ return false;
+ }
+
+ static void setMessageDescriptor(GenEncoder *p,
+ GenInstruction *inst,
+ enum GenMessageTarget sfid,
+ unsigned msg_length,
+ unsigned response_length,
+ bool header_present = false,
+ bool end_of_thread = false)
+ {
+ p->setSrc1(inst, GenRegister::immd(0));
+ inst->bits3.generic_gen5.header_present = header_present;
+ inst->bits3.generic_gen5.response_length = response_length;
+ inst->bits3.generic_gen5.msg_length = msg_length;
+ inst->bits3.generic_gen5.end_of_thread = end_of_thread;
+ inst->header.destreg_or_condmod = sfid;
+ }
+
+ static void setDPUntypedRW(GenEncoder *p,
+ GenInstruction *insn,
+ uint32_t bti,
+ uint32_t rgba,
+ uint32_t msg_type,
+ uint32_t msg_length,
+ uint32_t response_length)
+ {
+ const GenMessageTarget sfid = GEN_SFID_DATAPORT_DATA_CACHE;
+ setMessageDescriptor(p, insn, sfid, msg_length, response_length);
+ insn->bits3.gen7_untyped_rw.msg_type = msg_type;
+ insn->bits3.gen7_untyped_rw.bti = bti;
+ insn->bits3.gen7_untyped_rw.rgba = rgba;
+ if (p->curr.execWidth == 8)
+ insn->bits3.gen7_untyped_rw.simd_mode = GEN_UNTYPED_SIMD8;
+ else if (p->curr.execWidth == 16)
+ insn->bits3.gen7_untyped_rw.simd_mode = GEN_UNTYPED_SIMD16;
+ else
+ NOT_SUPPORTED;
+ }
+
+ static void setDPByteScatterGather(GenEncoder *p,
+ GenInstruction *insn,
+ uint32_t bti,
+ uint32_t elem_size,
+ uint32_t msg_type,
+ uint32_t msg_length,
+ uint32_t response_length)
+ {
+ const GenMessageTarget sfid = GEN_SFID_DATAPORT_DATA_CACHE;
+ setMessageDescriptor(p, insn, sfid, msg_length, response_length);
+ insn->bits3.gen7_byte_rw.msg_type = msg_type;
+ insn->bits3.gen7_byte_rw.bti = bti;
+ insn->bits3.gen7_byte_rw.data_size = elem_size;
+ if (p->curr.execWidth == 8)
+ insn->bits3.gen7_byte_rw.simd_mode = GEN_BYTE_SCATTER_SIMD8;
+ else if (p->curr.execWidth == 16)
+ insn->bits3.gen7_byte_rw.simd_mode = GEN_BYTE_SCATTER_SIMD16;
+ else
+ NOT_SUPPORTED;
+ }
+#if 0
+ static void setOBlockRW(GenEncoder *p,
+ GenInstruction *insn,
+ uint32_t bti,
+ uint32_t size,
+ uint32_t msg_type,
+ uint32_t msg_length,
+ uint32_t response_length)
+ {
+ const GenMessageTarget sfid = GEN_SFID_DATAPORT_DATA_CACHE;
+ setMessageDescriptor(p, insn, sfid, msg_length, response_length);
+ assert(size == 2 || size == 4);
+ insn->bits3.gen7_oblock_rw.msg_type = msg_type;
+ insn->bits3.gen7_oblock_rw.bti = bti;
+ insn->bits3.gen7_oblock_rw.block_size = size == 2 ? 2 : 3;
+ insn->bits3.gen7_oblock_rw.header_present = 1;
+ }
+#endif
+
+ static void setSamplerMessage(GenEncoder *p,
+ GenInstruction *insn,
+ uint32_t bti,
+ uint32_t sampler,
+ uint32_t msg_type,
+ uint32_t response_length,
+ uint32_t msg_length,
+ uint32_t header,
+ uint32_t simd_mode,
+ uint32_t return_format)
+ {
+ const GenMessageTarget sfid = GEN_SFID_SAMPLER;
+ setMessageDescriptor(p, insn, sfid, msg_length, response_length, header);
+ insn->bits3.sampler_gen7.bti = bti;
+ insn->bits3.sampler_gen7.sampler = sampler;
+ insn->bits3.sampler_gen7.msg_type = msg_type;
+ insn->bits3.sampler_gen7.simd_mode = simd_mode;
+ }
+
+ //////////////////////////////////////////////////////////////////////////
+ // Gen Emitter encoding class
+ //////////////////////////////////////////////////////////////////////////
+ GenEncoder::GenEncoder(uint32_t simdWidth, uint32_t gen) :
+ stateNum(0), gen(gen)
+ {
+ this->curr.execWidth = simdWidth;
+ this->curr.quarterControl = GEN_COMPRESSION_Q1;
+ this->curr.noMask = 0;
+ this->curr.flag = 0;
+ this->curr.subFlag = 0;
+ this->curr.predicate = GEN_PREDICATE_NORMAL;
+ this->curr.inversePredicate = 0;
+ }
+
+ void GenEncoder::push(void) {
+ assert(stateNum < MAX_STATE_NUM);
+ stack[stateNum++] = curr;
+ }
+
+ void GenEncoder::pop(void) {
+ assert(stateNum > 0);
+ curr = stack[--stateNum];
+ }
+
+ void GenEncoder::setHeader(GenInstruction *insn) {
+ if (this->curr.execWidth == 8)
+ insn->header.execution_size = GEN_WIDTH_8;
+ else if (this->curr.execWidth == 16)
+ insn->header.execution_size = GEN_WIDTH_16;
+ else if (this->curr.execWidth == 1)
+ insn->header.execution_size = GEN_WIDTH_1;
+ else
+ NOT_IMPLEMENTED;
+ insn->header.acc_wr_control = this->curr.accWrEnable;
+ insn->header.quarter_control = this->curr.quarterControl;
+ insn->header.mask_control = this->curr.noMask;
+ insn->bits2.ia1.flag_reg_nr = this->curr.flag;
+ insn->bits2.ia1.flag_sub_reg_nr = this->curr.subFlag;
+ if (this->curr.predicate != GEN_PREDICATE_NONE) {
+ insn->header.predicate_control = this->curr.predicate;
+ insn->header.predicate_inverse = this->curr.inversePredicate;
+ }
+ }
+
+ void GenEncoder::setDst(GenInstruction *insn, GenRegister dest) {
+ if (dest.file != GEN_ARCHITECTURE_REGISTER_FILE)
+ assert(dest.nr < 128);
+
+ insn->bits1.da1.dest_reg_file = dest.file;
+ insn->bits1.da1.dest_reg_type = dest.type;
+ insn->bits1.da1.dest_address_mode = dest.address_mode;
+ insn->bits1.da1.dest_reg_nr = dest.nr;
+ insn->bits1.da1.dest_subreg_nr = dest.subnr;
+ if (dest.hstride == GEN_HORIZONTAL_STRIDE_0)
+ dest.hstride = GEN_HORIZONTAL_STRIDE_1;
+ insn->bits1.da1.dest_horiz_stride = dest.hstride;
+ }
+
+ void GenEncoder::setSrc0(GenInstruction *insn, GenRegister reg) {
+ if (reg.file != GEN_ARCHITECTURE_REGISTER_FILE)
+ assert(reg.nr < 128);
+
+ if (reg.address_mode == GEN_ADDRESS_DIRECT) {
+ insn->bits1.da1.src0_reg_file = reg.file;
+ insn->bits1.da1.src0_reg_type = reg.type;
+ insn->bits2.da1.src0_abs = reg.absolute;
+ insn->bits2.da1.src0_negate = reg.negation;
+ insn->bits2.da1.src0_address_mode = reg.address_mode;
+
+ if (reg.file == GEN_IMMEDIATE_VALUE) {
+ insn->bits3.ud = reg.value.ud;
+
+ /* Required to set some fields in src1 as well: */
+ insn->bits1.da1.src1_reg_file = 0; /* arf */
+ insn->bits1.da1.src1_reg_type = reg.type;
+ }
+ else {
+ if (insn->header.access_mode == GEN_ALIGN_1) {
+ insn->bits2.da1.src0_subreg_nr = reg.subnr;
+ insn->bits2.da1.src0_reg_nr = reg.nr;
+ } else {
+ insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
+ insn->bits2.da16.src0_reg_nr = reg.nr;
+ }
+
+ if (reg.width == GEN_WIDTH_1 &&
+ insn->header.execution_size == GEN_WIDTH_1) {
+ insn->bits2.da1.src0_horiz_stride = GEN_HORIZONTAL_STRIDE_0;
+ insn->bits2.da1.src0_width = GEN_WIDTH_1;
+ insn->bits2.da1.src0_vert_stride = GEN_VERTICAL_STRIDE_0;
+ }
+ else {
+ insn->bits2.da1.src0_horiz_stride = reg.hstride;
+ insn->bits2.da1.src0_width = reg.width;
+ insn->bits2.da1.src0_vert_stride = reg.vstride;
+ }
+ }
+ } else {
+ insn->bits1.ia1.src0_reg_file = GEN_GENERAL_REGISTER_FILE;
+ insn->bits1.ia1.src0_reg_type = reg.type;
+ insn->bits2.ia1.src0_subreg_nr = 0;
+ insn->bits2.ia1.src0_indirect_offset = 0;
+ insn->bits2.ia1.src0_abs = 0;
+ insn->bits2.ia1.src0_negate = 0;
+ insn->bits2.ia1.src0_address_mode = reg.address_mode;
+ insn->bits2.ia1.src0_horiz_stride = GEN_HORIZONTAL_STRIDE_0;
+ insn->bits2.ia1.src0_width = GEN_WIDTH_1;
+ insn->bits2.ia1.src0_vert_stride = GEN_VERTICAL_STRIDE_ONE_DIMENSIONAL;
+ }
+ }
+
+ void GenEncoder::setSrc1(GenInstruction *insn, GenRegister reg) {
+ assert(reg.nr < 128);
+
+ insn->bits1.da1.src1_reg_file = reg.file;
+ insn->bits1.da1.src1_reg_type = reg.type;
+ insn->bits3.da1.src1_abs = reg.absolute;
+ insn->bits3.da1.src1_negate = reg.negation;
+
+ assert(insn->bits1.da1.src0_reg_file != GEN_IMMEDIATE_VALUE);
+
+ if (reg.file == GEN_IMMEDIATE_VALUE)
+ insn->bits3.ud = reg.value.ud;
+ else {
+ assert (reg.address_mode == GEN_ADDRESS_DIRECT);
+ if (insn->header.access_mode == GEN_ALIGN_1) {
+ insn->bits3.da1.src1_subreg_nr = reg.subnr;
+ insn->bits3.da1.src1_reg_nr = reg.nr;
+ } else {
+ insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
+ insn->bits3.da16.src1_reg_nr = reg.nr;
+ }
+
+ if (reg.width == GEN_WIDTH_1 &&
+ insn->header.execution_size == GEN_WIDTH_1) {
+ insn->bits3.da1.src1_horiz_stride = GEN_HORIZONTAL_STRIDE_0;
+ insn->bits3.da1.src1_width = GEN_WIDTH_1;
+ insn->bits3.da1.src1_vert_stride = GEN_VERTICAL_STRIDE_0;
+ } else {
+ insn->bits3.da1.src1_horiz_stride = reg.hstride;
+ insn->bits3.da1.src1_width = reg.width;
+ insn->bits3.da1.src1_vert_stride = reg.vstride;
+ }
+ }
+ }
+
+ static const uint32_t untypedRWMask[] = {
+ GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE|GEN_UNTYPED_GREEN|GEN_UNTYPED_RED,
+ GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE|GEN_UNTYPED_GREEN,
+ GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE,
+ GEN_UNTYPED_ALPHA,
+ 0
+ };
+
+ void GenEncoder::UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum) {
+ GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+ assert(elemNum >= 1 || elemNum <= 4);
+ uint32_t msg_length = 0;
+ uint32_t response_length = 0;
+ if (this->curr.execWidth == 8) {
+ msg_length = 1;
+ response_length = elemNum;
+ } else if (this->curr.execWidth == 16) {
+ msg_length = 2;
+ response_length = 2*elemNum;
+ } else
+ NOT_IMPLEMENTED;
+
+ this->setHeader(insn);
+ this->setDst(insn, GenRegister::uw16grf(dst.nr, 0));
+ this->setSrc0(insn, GenRegister::ud8grf(src.nr, 0));
+ this->setSrc1(insn, GenRegister::immud(0));
+ setDPUntypedRW(this,
+ insn,
+ bti,
+ untypedRWMask[elemNum],
+ GEN_UNTYPED_READ,
+ msg_length,
+ response_length);
+ }
+
+ void GenEncoder::UNTYPED_WRITE(GenRegister msg, uint32_t bti, uint32_t elemNum) {
+ GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+ assert(elemNum >= 1 || elemNum <= 4);
+ uint32_t msg_length = 0;
+ uint32_t response_length = 0;
+ this->setHeader(insn);
+ if (this->curr.execWidth == 8) {
+ this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
+ msg_length = 1+elemNum;
+ } else if (this->curr.execWidth == 16) {
+ this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UW));
+ msg_length = 2*(1+elemNum);
+ }
+ else
+ NOT_IMPLEMENTED;
+ this->setSrc0(insn, GenRegister::ud8grf(msg.nr, 0));
+ this->setSrc1(insn, GenRegister::immud(0));
+ setDPUntypedRW(this,
+ insn,
+ bti,
+ untypedRWMask[elemNum],
+ GEN_UNTYPED_WRITE,
+ msg_length,
+ response_length);
+ }
+
+ void GenEncoder::BYTE_GATHER(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemSize) {
+ GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+ uint32_t msg_length = 0;
+ uint32_t response_length = 0;
+ if (this->curr.execWidth == 8) {
+ msg_length = 1;
+ response_length = 1;
+ } else if (this->curr.execWidth == 16) {
+ msg_length = 2;
+ response_length = 2;
+ } else
+ NOT_IMPLEMENTED;
+
+ this->setHeader(insn);
+ this->setDst(insn, GenRegister::uw16grf(dst.nr, 0));
+ this->setSrc0(insn, GenRegister::ud8grf(src.nr, 0));
+ this->setSrc1(insn, GenRegister::immud(0));
+ setDPByteScatterGather(this,
+ insn,
+ bti,
+ elemSize,
+ GEN_BYTE_GATHER,
+ msg_length,
+ response_length);
+ }
+
+ void GenEncoder::BYTE_SCATTER(GenRegister msg, uint32_t bti, uint32_t elemSize) {
+ GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+ uint32_t msg_length = 0;
+ uint32_t response_length = 0;
+ this->setHeader(insn);
+ if (this->curr.execWidth == 8) {
+ this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
+ msg_length = 2;
+ } else if (this->curr.execWidth == 16) {
+ this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UW));
+ msg_length = 4;
+ } else
+ NOT_IMPLEMENTED;
+ this->setSrc0(insn, GenRegister::ud8grf(msg.nr, 0));
+ this->setSrc1(insn, GenRegister::immud(0));
+ setDPByteScatterGather(this,
+ insn,
+ bti,
+ elemSize,
+ GEN_BYTE_SCATTER,
+ msg_length,
+ response_length);
+ }
+
+ GenInstruction *GenEncoder::next(uint32_t opcode) {
+ GenInstruction insn;
+ std::memset(&insn, 0, sizeof(GenInstruction));
+ insn.header.opcode = opcode;
+ this->store.push_back(insn);
+ return &this->store.back();
+ }
+
+ INLINE void alu1(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src) {
+ if (needToSplitAlu1(p, dst, src) == false) {
+ GenInstruction *insn = p->next(opcode);
+ p->setHeader(insn);
+ p->setDst(insn, dst);
+ p->setSrc0(insn, src);
+ } else {
+ GenInstruction *insnQ1, *insnQ2;
+
+ // Instruction for the first quarter
+ insnQ1 = p->next(opcode);
+ p->setHeader(insnQ1);
+ insnQ1->header.quarter_control = GEN_COMPRESSION_Q1;
+ insnQ1->header.execution_size = GEN_WIDTH_8;
+ p->setDst(insnQ1, dst);
+ p->setSrc0(insnQ1, src);
+
+ // Instruction for the second quarter
+ insnQ2 = p->next(opcode);
+ p->setHeader(insnQ2);
+ insnQ2->header.quarter_control = GEN_COMPRESSION_Q2;
+ insnQ2->header.execution_size = GEN_WIDTH_8;
+ p->setDst(insnQ2, GenRegister::Qn(dst, 1));
+ p->setSrc0(insnQ2, GenRegister::Qn(src, 1));
+ }
+ }
+
+ INLINE void alu2(GenEncoder *p,
+ uint32_t opcode,
+ GenRegister dst,
+ GenRegister src0,
+ GenRegister src1)
+ {
+ if (needToSplitAlu2(p, dst, src0, src1) == false) {
+ GenInstruction *insn = p->next(opcode);
+ p->setHeader(insn);
+ p->setDst(insn, dst);
+ p->setSrc0(insn, src0);
+ p->setSrc1(insn, src1);
+ } else {
+ GenInstruction *insnQ1, *insnQ2;
+
+ // Instruction for the first quarter
+ insnQ1 = p->next(opcode);
+ p->setHeader(insnQ1);
+ insnQ1->header.quarter_control = GEN_COMPRESSION_Q1;
+ insnQ1->header.execution_size = GEN_WIDTH_8;
+ p->setDst(insnQ1, dst);
+ p->setSrc0(insnQ1, src0);
+ p->setSrc1(insnQ1, src1);
+
+ // Instruction for the second quarter
+ insnQ2 = p->next(opcode);
+ p->setHeader(insnQ2);
+ insnQ2->header.quarter_control = GEN_COMPRESSION_Q2;
+ insnQ2->header.execution_size = GEN_WIDTH_8;
+ p->setDst(insnQ2, GenRegister::Qn(dst, 1));
+ p->setSrc0(insnQ2, GenRegister::Qn(src0, 1));
+ p->setSrc1(insnQ2, GenRegister::Qn(src1, 1));
+ }
+ }
+
+#define NO_SWIZZLE ((0<<0) | (1<<2) | (2<<4) | (3<<6))
+
+ static GenInstruction *alu3(GenEncoder *p,
+ uint32_t opcode,
+ GenRegister dest,
+ GenRegister src0,
+ GenRegister src1,
+ GenRegister src2)
+ {
+ GenInstruction *insn = p->next(opcode);
+
+ assert(dest.file == GEN_GENERAL_REGISTER_FILE);
+ assert(dest.nr < 128);
+ assert(dest.address_mode == GEN_ADDRESS_DIRECT);
+ assert(dest.type = GEN_TYPE_F);
+ insn->bits1.da3src.dest_reg_file = 0;
+ insn->bits1.da3src.dest_reg_nr = dest.nr;
+ insn->bits1.da3src.dest_subreg_nr = dest.subnr / 16;
+ insn->bits1.da3src.dest_writemask = 0xf;
+ p->setHeader(insn);
+ insn->header.access_mode = GEN_ALIGN_16;
+ insn->header.execution_size = GEN_WIDTH_8;
+
+ assert(src0.file == GEN_GENERAL_REGISTER_FILE);
+ assert(src0.address_mode == GEN_ADDRESS_DIRECT);
+ assert(src0.nr < 128);
+ assert(src0.type == GEN_TYPE_F);
+ insn->bits2.da3src.src0_swizzle = NO_SWIZZLE;
+ insn->bits2.da3src.src0_subreg_nr = src0.subnr / 4 ;
+ insn->bits2.da3src.src0_reg_nr = src0.nr;
+ insn->bits1.da3src.src0_abs = src0.absolute;
+ insn->bits1.da3src.src0_negate = src0.negation;
+ insn->bits2.da3src.src0_rep_ctrl = src0.vstride == GEN_VERTICAL_STRIDE_0;
+
+ assert(src1.file == GEN_GENERAL_REGISTER_FILE);
+ assert(src1.address_mode == GEN_ADDRESS_DIRECT);
+ assert(src1.nr < 128);
+ assert(src1.type == GEN_TYPE_F);
+ insn->bits2.da3src.src1_swizzle = NO_SWIZZLE;
+ insn->bits2.da3src.src1_subreg_nr_low = (src1.subnr / 4) & 0x3;
+ insn->bits3.da3src.src1_subreg_nr_high = (src1.subnr / 4) >> 2;
+ insn->bits2.da3src.src1_rep_ctrl = src1.vstride == GEN_VERTICAL_STRIDE_0;
+ insn->bits3.da3src.src1_reg_nr = src1.nr;
+ insn->bits1.da3src.src1_abs = src1.absolute;
+ insn->bits1.da3src.src1_negate = src1.negation;
+
+ assert(src2.file == GEN_GENERAL_REGISTER_FILE);
+ assert(src2.address_mode == GEN_ADDRESS_DIRECT);
+ assert(src2.nr < 128);
+ assert(src2.type == GEN_TYPE_F);
+ insn->bits3.da3src.src2_swizzle = NO_SWIZZLE;
+ insn->bits3.da3src.src2_subreg_nr = src2.subnr / 4;
+ insn->bits3.da3src.src2_rep_ctrl = src2.vstride == GEN_VERTICAL_STRIDE_0;
+ insn->bits3.da3src.src2_reg_nr = src2.nr;
+ insn->bits1.da3src.src2_abs = src2.absolute;
+ insn->bits1.da3src.src2_negate = src2.negation;
+
+ // Emit second half of the instruction
+ if (p->curr.execWidth == 16) {
+ GenInstruction q1Insn = *insn;
+ insn = p->next(opcode);
+ *insn = q1Insn;
+ insn->header.quarter_control = GEN_COMPRESSION_Q2;
+ insn->bits1.da3src.dest_reg_nr++;
+ if (insn->bits2.da3src.src0_rep_ctrl == 0)
+ insn->bits2.da3src.src0_reg_nr++;
+ if (insn->bits2.da3src.src1_rep_ctrl == 0)
+ insn->bits3.da3src.src1_reg_nr++;
+ if (insn->bits3.da3src.src2_rep_ctrl == 0)
+ insn->bits3.da3src.src2_reg_nr++;
+ }
+
+ return insn;
+ }
+
+#undef NO_SWIZZLE
+
+#define ALU1(OP) \
+ void GenEncoder::OP(GenRegister dest, GenRegister src0) { \
+ alu1(this, GEN_OPCODE_##OP, dest, src0); \
+ }
+
+#define ALU2(OP) \
+ void GenEncoder::OP(GenRegister dest, GenRegister src0, GenRegister src1) { \
+ alu2(this, GEN_OPCODE_##OP, dest, src0, src1); \
+ }
+
+#define ALU3(OP) \
+ void GenEncoder::OP(GenRegister dest, GenRegister src0, GenRegister src1, GenRegister src2) { \
+ alu3(this, GEN_OPCODE_##OP, dest, src0, src1, src2); \
+ }
+
+ ALU1(MOV)
+ ALU1(RNDZ)
+ ALU1(RNDE)
+ ALU1(RNDD)
+ ALU1(RNDU)
+ ALU2(SEL)
+ ALU1(NOT)
+ ALU2(AND)
+ ALU2(OR)
+ ALU2(XOR)
+ ALU2(SHR)
+ ALU2(SHL)
+ ALU2(RSR)
+ ALU2(RSL)
+ ALU2(ASR)
+ ALU1(FRC)
+ ALU2(MAC)
+ ALU1(LZD)
+ ALU2(LINE)
+ ALU2(PLN)
+ ALU2(MACH)
+ ALU3(MAD)
+
+ void GenEncoder::ADD(GenRegister dest, GenRegister src0, GenRegister src1) {
+ if (src0.type == GEN_TYPE_F ||
+ (src0.file == GEN_IMMEDIATE_VALUE &&
+ src0.type == GEN_TYPE_VF)) {
+ assert(src1.type != GEN_TYPE_UD);
+ assert(src1.type != GEN_TYPE_D);
+ }
+
+ if (src1.type == GEN_TYPE_F ||
+ (src1.file == GEN_IMMEDIATE_VALUE &&
+ src1.type == GEN_TYPE_VF)) {
+ assert(src0.type != GEN_TYPE_UD);
+ assert(src0.type != GEN_TYPE_D);
+ }
+
+ alu2(this, GEN_OPCODE_ADD, dest, src0, src1);
+ }
+
+ void GenEncoder::MUL(GenRegister dest, GenRegister src0, GenRegister src1) {
+ if (src0.type == GEN_TYPE_D ||
+ src0.type == GEN_TYPE_UD ||
+ src1.type == GEN_TYPE_D ||
+ src1.type == GEN_TYPE_UD)
+ assert(dest.type != GEN_TYPE_F);
+
+ if (src0.type == GEN_TYPE_F ||
+ (src0.file == GEN_IMMEDIATE_VALUE &&
+ src0.type == GEN_TYPE_VF)) {
+ assert(src1.type != GEN_TYPE_UD);
+ assert(src1.type != GEN_TYPE_D);
+ }
+
+ if (src1.type == GEN_TYPE_F ||
+ (src1.file == GEN_IMMEDIATE_VALUE &&
+ src1.type == GEN_TYPE_VF)) {
+ assert(src0.type != GEN_TYPE_UD);
+ assert(src0.type != GEN_TYPE_D);
+ }
+
+ assert(src0.file != GEN_ARCHITECTURE_REGISTER_FILE ||
+ src0.nr != GEN_ARF_ACCUMULATOR);
+ assert(src1.file != GEN_ARCHITECTURE_REGISTER_FILE ||
+ src1.nr != GEN_ARF_ACCUMULATOR);
+
+ alu2(this, GEN_OPCODE_MUL, dest, src0, src1);
+ }
+
+
+ void GenEncoder::NOP(void) {
+ GenInstruction *insn = this->next(GEN_OPCODE_NOP);
+ this->setDst(insn, GenRegister::retype(GenRegister::f4grf(0,0), GEN_TYPE_UD));
+ this->setSrc0(insn, GenRegister::retype(GenRegister::f4grf(0,0), GEN_TYPE_UD));
+ this->setSrc1(insn, GenRegister::immud(0x0));
+ }
+
+ void GenEncoder::BARRIER(GenRegister src) {
+ GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+ this->setHeader(insn);
+ this->setDst(insn, GenRegister::null());
+ this->setSrc0(insn, src);
+ setMessageDescriptor(this, insn, GEN_SFID_MESSAGE_GATEWAY, 1, 0);
+ insn->bits3.msg_gateway.sub_function_id = GEN_BARRIER_MSG;
+ insn->bits3.msg_gateway.notify = 0x1;
+ }
+
+ void GenEncoder::JMPI(GenRegister src) {
+ alu2(this, GEN_OPCODE_JMPI, GenRegister::ip(), GenRegister::ip(), src);
+ }
+
+ void GenEncoder::patchJMPI(uint32_t insnID, int32_t jumpDistance) {
+ GenInstruction &insn = this->store[insnID];
+ assert(insnID < this->store.size());
+ assert(insn.header.opcode == GEN_OPCODE_JMPI);
+ this->setSrc1(&insn, GenRegister::immd(jumpDistance));
+ }
+
+ void GenEncoder::CMP(uint32_t conditional, GenRegister src0, GenRegister src1) {
+ if (needToSplitCmp(this, src0, src1) == false) {
+ GenInstruction *insn = this->next(GEN_OPCODE_CMP);
+ this->setHeader(insn);
+ insn->header.destreg_or_condmod = conditional;
+ this->setDst(insn, GenRegister::null());
+ this->setSrc0(insn, src0);
+ this->setSrc1(insn, src1);
+ } else {
+ GenInstruction *insnQ1, *insnQ2;
+
+ // Instruction for the first quarter
+ insnQ1 = this->next(GEN_OPCODE_CMP);
+ this->setHeader(insnQ1);
+ insnQ1->header.quarter_control = GEN_COMPRESSION_Q1;
+ insnQ1->header.execution_size = GEN_WIDTH_8;
+ insnQ1->header.destreg_or_condmod = conditional;
+ this->setDst(insnQ1, GenRegister::null());
+ this->setSrc0(insnQ1, src0);
+ this->setSrc1(insnQ1, src1);
+
+ // Instruction for the second quarter
+ insnQ2 = this->next(GEN_OPCODE_CMP);
+ this->setHeader(insnQ2);
+ insnQ2->header.quarter_control = GEN_COMPRESSION_Q2;
+ insnQ2->header.execution_size = GEN_WIDTH_8;
+ insnQ2->header.destreg_or_condmod = conditional;
+ this->setDst(insnQ2, GenRegister::null());
+ this->setSrc0(insnQ2, GenRegister::Qn(src0, 1));
+ this->setSrc1(insnQ2, GenRegister::Qn(src1, 1));
+ }
+ }
+
+ void GenEncoder::SEL_CMP(uint32_t conditional,
+ GenRegister dst,
+ GenRegister src0,
+ GenRegister src1)
+ {
+ GenInstruction *insn = this->next(GEN_OPCODE_SEL);
+ GBE_ASSERT(curr.predicate == GEN_PREDICATE_NONE);
+ this->setHeader(insn);
+ insn->header.destreg_or_condmod = conditional;
+ this->setDst(insn, dst);
+ this->setSrc0(insn, src0);
+ this->setSrc1(insn, src1);
+ }
+
+ void GenEncoder::WAIT(void) {
+ GenInstruction *insn = this->next(GEN_OPCODE_WAIT);
+ GenRegister src = GenRegister::notification1();
+ this->setDst(insn, GenRegister::null());
+ this->setSrc0(insn, src);
+ this->setSrc1(insn, GenRegister::null());
+ insn->header.execution_size = 0; /* must */
+ insn->header.predicate_control = 0;
+ insn->header.quarter_control = 0;
+ }
+
+ void GenEncoder::MATH(GenRegister dst, uint32_t function, GenRegister src0, GenRegister src1) {
+ GenInstruction *insn = this->next(GEN_OPCODE_MATH);
+ assert(dst.file == GEN_GENERAL_REGISTER_FILE);
+ assert(src0.file == GEN_GENERAL_REGISTER_FILE);
+ assert(src1.file == GEN_GENERAL_REGISTER_FILE);
+ assert(dst.hstride == GEN_HORIZONTAL_STRIDE_1);
+
+ if (function == GEN_MATH_FUNCTION_INT_DIV_QUOTIENT ||
+ function == GEN_MATH_FUNCTION_INT_DIV_REMAINDER ||
+ function == GEN_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
+ assert(src0.type != GEN_TYPE_F);
+ assert(src1.type != GEN_TYPE_F);
+ } else {
+ assert(src0.type == GEN_TYPE_F);
+ assert(src1.type == GEN_TYPE_F);
+ }
+
+ insn->header.destreg_or_condmod = function;
+ this->setHeader(insn);
+ this->setDst(insn, dst);
+ this->setSrc0(insn, src0);
+ this->setSrc1(insn, src1);
+ }
+
+ void GenEncoder::MATH(GenRegister dst, uint32_t function, GenRegister src) {
+ GenInstruction *insn = this->next(GEN_OPCODE_MATH);
+ assert(dst.file == GEN_GENERAL_REGISTER_FILE);
+ assert(src.file == GEN_GENERAL_REGISTER_FILE);
+ assert(dst.hstride == GEN_HORIZONTAL_STRIDE_1);
+ assert(src.type == GEN_TYPE_F);
+
+ insn->header.destreg_or_condmod = function;
+ this->setHeader(insn);
+ this->setDst(insn, dst);
+ this->setSrc0(insn, src);
+ }
+
+ void GenEncoder::SAMPLE(GenRegister dest,
+ uint32_t msg_reg_nr,
+ GenRegister src0,
+ uint32_t bti,
+ uint32_t sampler,
+ uint32_t writemask,
+ uint32_t msg_type,
+ uint32_t response_length,
+ uint32_t msg_length,
+ uint32_t header_present,
+ uint32_t simd_mode,
+ uint32_t return_format)
+ {
+ if (writemask == 0) return;
+
+ GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+ insn->header.predicate_control = 0; /* XXX */
+ this->setHeader(insn);
+ this->setDst(insn, dest);
+ this->setSrc0(insn, src0);
+ setSamplerMessage(this,
+ insn,
+ bti,
+ sampler,
+ msg_type,
+ response_length,
+ msg_length,
+ header_present,
+ simd_mode,
+ return_format);
+ }
+
+ void GenEncoder::EOT(uint32_t msg) {
+ GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+ this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
+ this->setSrc0(insn, GenRegister::ud8grf(msg,0));
+ this->setSrc1(insn, GenRegister::immud(0));
+ insn->header.execution_size = GEN_WIDTH_8;
+ insn->bits3.spawner_gen5.resource = GEN_DO_NOT_DEREFERENCE_URB;
+ insn->bits3.spawner_gen5.msg_length = 1;
+ insn->bits3.spawner_gen5.end_of_thread = 1;
+ insn->header.destreg_or_condmod = GEN_SFID_THREAD_SPAWNER;
+ }
+} /* namespace gbe */
+
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
new file mode 100644
index 0000000..3ad52e7
--- /dev/null
+++ b/backend/src/backend/gen_encoder.hpp
@@ -0,0 +1,177 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith at tungstengraphics.com>
+ */
+
+#ifndef __GBE_GEN_ENCODER_HPP__
+#define __GBE_GEN_ENCODER_HPP__
+
+#include "backend/gen_defs.hpp"
+#include "backend/gen_register.hpp"
+#include "sys/platform.hpp"
+#include "sys/vector.hpp"
+#include <cassert>
+
+namespace gbe
+{
+ /*! Helper structure to emit Gen instructions */
+ class GenEncoder
+ {
+ public:
+ /*! simdWidth is the default width for the instructions */
+ GenEncoder(uint32_t simdWidth, uint32_t gen);
+ /*! Size of the stack (should be large enough) */
+ enum { MAX_STATE_NUM = 16 };
+ /*! Push the current instruction state */
+ void push(void);
+ /*! Pop the latest pushed state */
+ void pop(void);
+ /*! The instruction stream we are building */
+ vector<GenInstruction> store;
+ /*! Current instruction state to use */
+ GenInstructionState curr;
+ /*! State used to encode the instructions */
+ GenInstructionState stack[MAX_STATE_NUM];
+ /*! Number of states currently pushed */
+ uint32_t stateNum;
+ /*! Gen generation to encode */
+ uint32_t gen;
+
+ ////////////////////////////////////////////////////////////////////////
+ // Encoding functions
+ ////////////////////////////////////////////////////////////////////////
+
+#define ALU1(OP) void OP(GenRegister dest, GenRegister src0);
+#define ALU2(OP) void OP(GenRegister dest, GenRegister src0, GenRegister src1);
+#define ALU3(OP) void OP(GenRegister dest, GenRegister src0, GenRegister src1, GenRegister src2);
+ ALU1(MOV)
+ ALU1(RNDZ)
+ ALU1(RNDE)
+ ALU1(RNDD)
+ ALU1(RNDU)
+ ALU2(SEL)
+ ALU1(NOT)
+ ALU2(AND)
+ ALU2(OR)
+ ALU2(XOR)
+ ALU2(SHR)
+ ALU2(SHL)
+ ALU2(RSR)
+ ALU2(RSL)
+ ALU2(ASR)
+ ALU2(ADD)
+ ALU2(MUL)
+ ALU1(FRC)
+ ALU2(MAC)
+ ALU2(MACH)
+ ALU1(LZD)
+ ALU2(LINE)
+ ALU2(PLN)
+ ALU3(MAD)
+#undef ALU1
+#undef ALU2
+#undef ALU3
+ /*! Barrier message (to synchronize threads of a workgroup) */
+ void BARRIER(GenRegister src);
+ /*! Jump indexed instruction */
+ void JMPI(GenRegister src);
+ /*! Compare instructions */
+ void CMP(uint32_t conditional, GenRegister src0, GenRegister src1);
+ /*! Select with embedded compare (like sel.le ...) */
+ void SEL_CMP(uint32_t conditional, GenRegister dst, GenRegister src0, GenRegister src1);
+ /*! EOT is used to finish GPGPU threads */
+ void EOT(uint32_t msg_nr);
+ /*! No-op */
+ void NOP(void);
+ /*! Wait instruction (used for the barrier) */
+ void WAIT(void);
+ /*! Untyped read (upto 4 channels) */
+ void UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum);
+ /*! Untyped write (upto 4 channels) */
+ void UNTYPED_WRITE(GenRegister src, uint32_t bti, uint32_t elemNum);
+ /*! Byte gather (for unaligned bytes, shorts and ints) */
+ void BYTE_GATHER(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemSize);
+ /*! Byte scatter (for unaligned bytes, shorts and ints) */
+ void BYTE_SCATTER(GenRegister src, uint32_t bti, uint32_t elemSize);
+ /*! Send instruction for the sampler */
+ void SAMPLE(GenRegister dest,
+ uint32_t msg_reg_nr,
+ GenRegister src0,
+ uint32_t bti,
+ uint32_t sampler,
+ uint32_t writemask,
+ uint32_t msg_type,
+ uint32_t response_length,
+ uint32_t msg_length,
+ uint32_t header_present,
+ uint32_t simd_mode,
+ uint32_t return_format);
+ /*! Extended math function (2 sources) */
+ void MATH(GenRegister dst, uint32_t function, GenRegister src0, GenRegister src1);
+ /*! Extended math function (1 source) */
+ void MATH(GenRegister dst, uint32_t function, GenRegister src);
+
+ /*! Patch JMPI (located at index insnID) with the given jump distance */
+ void patchJMPI(uint32_t insnID, int32_t jumpDistance);
+
+ ////////////////////////////////////////////////////////////////////////
+ // Helper functions to encode
+ ////////////////////////////////////////////////////////////////////////
+ void setHeader(GenInstruction *insn);
+ void setDst(GenInstruction *insn, GenRegister dest);
+ void setSrc0(GenInstruction *insn, GenRegister reg);
+ void setSrc1(GenInstruction *insn, GenRegister reg);
+ GenInstruction *next(uint32_t opcode);
+ GBE_CLASS(GenEncoder); //!< Use custom allocators
+ };
+
+} /* namespace gbe */
+
+#endif /* __GBE_GEN_ENCODER_HPP__ */
+
+
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
new file mode 100644
index 0000000..d9daad6
--- /dev/null
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -0,0 +1,17 @@
+// Family Latency SIMD16 SIMD8
+DECL_GEN7_SCHEDULE(Label, 0, 0, 0)
+DECL_GEN7_SCHEDULE(Unary, 20, 4, 2)
+DECL_GEN7_SCHEDULE(Binary, 20, 4, 2)
+DECL_GEN7_SCHEDULE(Ternary, 20, 4, 2)
+DECL_GEN7_SCHEDULE(Compare, 20, 4, 2)
+DECL_GEN7_SCHEDULE(Jump, 14, 1, 1)
+DECL_GEN7_SCHEDULE(Eot, 20, 1, 1)
+DECL_GEN7_SCHEDULE(NoOp, 20, 2, 2)
+DECL_GEN7_SCHEDULE(Wait, 20, 2, 2)
+DECL_GEN7_SCHEDULE(Math, 20, 4, 2)
+DECL_GEN7_SCHEDULE(Barrier, 80, 1, 1)
+DECL_GEN7_SCHEDULE(UntypedRead, 80, 1, 1)
+DECL_GEN7_SCHEDULE(UntypedWrite, 80, 1, 1)
+DECL_GEN7_SCHEDULE(ByteGather, 80, 1, 1)
+DECL_GEN7_SCHEDULE(ByteScatter, 80, 1, 1)
+
diff --git a/backend/src/backend/gen_insn_scheduling.cpp b/backend/src/backend/gen_insn_scheduling.cpp
new file mode 100644
index 0000000..01c525e
--- /dev/null
+++ b/backend/src/backend/gen_insn_scheduling.cpp
@@ -0,0 +1,597 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file gen_insn_scheduling.cpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/*
+ * Overall idea:
+ * =============
+ *
+ * This is the instruction scheduling part of the code. With Gen, we actually
+ * have a simple strategy to follow. Indeed, here are the constraints:
+ *
+ * 1 - the number of registers per HW thread is constant and given (128 32 bytes
+ * GRF per thread). So, we can use all these registers with no penalty
+ * 2 - spilling is super bad. Instruction latency matters but the top priority
+ * is to avoid as much as possible spilling
+ *
+ *
+ * We schedule twice using at each time a local forward list scheduler
+ *
+ * Before the register allocation
+ * ==============================
+ *
+ * We try to limit the register pressure.
+ * Well, this is a hard problem and we have a decent strategy now that we called
+ * "zero cycled LIFO scheduling".
+ * We use a local forward list scheduling and we schedule the instructions in a
+ * LIFO order i.e. as a stack. Basically, we take the most recent instruction
+ * and schedule it right away. Obviously we ignore completely the real latencies
+ * and throuputs and just simulate instructions that are issued and completed in
+ * zero cycle. For the complex kernels we already have (like menger sponge),
+ * this provides a pretty good strategy enabling SIMD16 code generation where
+ * when scheduling is deactivated, even SIMD8 fails
+ *
+ * One may argue that this strategy is bad, latency wise. This is not true since
+ * the register allocator will anyway try to burn as many registers as possible.
+ * So, there is still opportunities to schedule after register allocation.
+ *
+ * Our idea seems to work decently. There is however a strong research article
+ * that is able to near-optimally reschudle the instructions to minimize
+ * register use. This is:
+ *
+ * "Minimum Register Instruction Sequence Problem: Revisiting Optimal Code
+ * Generation for DAGs"
+ *
+ * After the register allocation
+ * ==============================
+ *
+ * This is here a pretty simple strategy based on a regular forward list
+ * scheduling. Since Gen is a co-issue based machine, this is useless to take
+ * into account really precise timings since instruction issues will happen
+ * out-of-order based on other thread executions.
+ *
+ * Note that we over-simplify the problem. Indeed, Gen register file is flexible
+ * and we are able to use sub-registers of GRF in particular when we handle
+ * uniforms or mask registers which are spilled in GRFs. Thing is that two
+ * uniforms may not interfere even if they belong to the same GRF (i.e. they use
+ * two different sub-registers). This means that the interference relation is
+ * not transitive for Gen. To simplify everything, we just take consider full
+ * GRFs (in SIMD8) or double full GRFs (in SIMD16) regardless of the fact this
+ * is a uniform, a mask or a regular GRF.
+ *
+ * Obviously, this leads to extra dependencies in the code.
+ */
+
+#include "backend/gen_insn_selection.hpp"
+#include "backend/gen_reg_allocation.hpp"
+#include "sys/cvar.hpp"
+#include "sys/intrusive_list.hpp"
+
+namespace gbe
+{
+ // Helper structure to schedule the basic blocks
+ struct SelectionScheduler;
+
+ // Node for the schedule DAG
+ struct ScheduleDAGNode;
+
+ /*! We need to chain together the node we point */
+ struct ScheduleListNode : public intrusive_list_node
+ {
+ INLINE ScheduleListNode(ScheduleDAGNode *node) : node(node) {}
+ ScheduleDAGNode *node;
+ };
+
+ /*! Node of the DAG */
+ struct ScheduleDAGNode
+ {
+ INLINE ScheduleDAGNode(SelectionInstruction &insn) :
+ insn(insn), refNum(0), retiredCycle(0) {}
+ bool dependsOn(ScheduleDAGNode *node) const {
+ GBE_ASSERT(node != NULL);
+ for (auto child : children)
+ if (child.node == this)
+ return true;
+ return false;
+ }
+ /*! Children that depends on us */
+ intrusive_list<ScheduleListNode> children;
+ /*! Instruction after code selection */
+ SelectionInstruction &insn;
+ /*! Number of nodes that point to us (i.e. nodes we depend on) */
+ uint32_t refNum;
+ /*! Cycle when the instruction is retired */
+ uint32_t retiredCycle;
+ };
+
+ /*! To track loads and stores */
+ enum GenMemory : uint8_t {
+ GLOBAL_MEMORY = 0,
+ LOCAL_MEMORY,
+ MAX_MEM_SYSTEM
+ };
+
+ /*! Do we allocate after or before the register allocation? */
+ enum SchedulePolicy {
+ PRE_ALLOC = 0, // LIFO scheduling (tends to limit register pressure)
+ POST_ALLOC // FIFO scheduling (limits latency problems)
+ };
+
+ /*! Helper structure to handle dependencies while scheduling. Takes into
+ * account virtual and physical registers and memory sub-systems
+ */
+ struct DependencyTracker : public NonCopyable
+ {
+ DependencyTracker(const Selection &selection, SelectionScheduler &scheduler);
+ /*! Reset it before scheduling a new block */
+ void clear(void);
+ /*! Get an index in the node array for the given register */
+ uint32_t getIndex(GenRegister reg) const;
+ /*! Get an index in the node array for the given memory system */
+ uint32_t getIndex(uint32_t bti) const;
+ /*! Add a new dependency "node0 depends on node1" */
+ void addDependency(ScheduleDAGNode *node0, ScheduleDAGNode *node1);
+ /*! Add a new dependency "node0 depends on node located at index" */
+ void addDependency(ScheduleDAGNode *node0, uint32_t index);
+ /*! Add a new dependency "node located at index depends on node0" */
+ void addDependency(uint32_t index, ScheduleDAGNode *node0);
+ /*! No dependency for null registers and immediate */
+ INLINE bool ignoreDependency(GenRegister reg) const {
+ if (reg.file == GEN_IMMEDIATE_VALUE)
+ return true;
+ else if (reg.file == GEN_ARCHITECTURE_REGISTER_FILE) {
+ if ((reg.nr & 0xf0) == GEN_ARF_NULL)
+ return true;
+ }
+ return false;
+ }
+ /*! Add a new dependency "node0 depends on node set for register reg" */
+ INLINE void addDependency(ScheduleDAGNode *node0, GenRegister reg) {
+ if (this->ignoreDependency(reg) == false) {
+ const uint32_t index = this->getIndex(reg);
+ this->addDependency(node0, index);
+ }
+ }
+ /*! Add a new dependency "node set for register reg depends on node0" */
+ INLINE void addDependency(GenRegister reg, ScheduleDAGNode *node0) {
+ if (this->ignoreDependency(reg) == false) {
+ const uint32_t index = this->getIndex(reg);
+ this->addDependency(index, node0);
+ }
+ }
+ /*! Make the node located at insnID a barrier */
+ void makeBarrier(int32_t insnID, int32_t insnNum);
+ /*! Update all the writes (memory, predicates, registers) */
+ void updateWrites(ScheduleDAGNode *node);
+ /*! Maximum number of *physical* flag registers */
+ static const uint32_t MAX_FLAG_REGISTER = 8u;
+ /*! Maximum number of *physical* accumulators registers */
+ static const uint32_t MAX_ACC_REGISTER = 1u;
+ /*! Owns the tracker */
+ SelectionScheduler &scheduler;
+ /*! Stores the last node that wrote to a register / memory ... */
+ vector<ScheduleDAGNode*> nodes;
+ /*! Stores the nodes per instruction */
+ vector<ScheduleDAGNode*> insnNodes;
+ /*! Number of virtual register in the selection */
+ uint32_t grfNum;
+ };
+
+ /*! Perform the instruction scheduling */
+ struct SelectionScheduler : public NonCopyable
+ {
+ /*! Init the book keeping structures */
+ SelectionScheduler(GenContext &ctx, Selection &selection, SchedulePolicy policy);
+ /*! Make all lists empty */
+ void clearLists(void);
+ /*! Return the number of instructions to schedule in the DAG */
+ int32_t buildDAG(SelectionBlock &bb);
+ /*! Schedule the DAG */
+ void scheduleDAG(SelectionBlock &bb, int32_t insnNum);
+ /*! To limit register pressure or limit insn latency problems */
+ SchedulePolicy policy;
+ /*! Make ScheduleListNode allocation faster */
+ DECL_POOL(ScheduleListNode, listPool);
+ /*! Make ScheduleDAGNode allocation faster */
+ DECL_POOL(ScheduleDAGNode, nodePool);
+ /*! Ready list is instructions that can be scheduled */
+ intrusive_list<ScheduleListNode> ready;
+ /*! Active list is instructions that are executing */
+ intrusive_list<ScheduleListNode> active;
+ /*! Handle complete compilation */
+ GenContext &ctx;
+ /*! Code to schedule */
+ Selection &selection;
+ /*! To help tracking dependencies */
+ DependencyTracker tracker;
+ };
+
+ DependencyTracker::DependencyTracker(const Selection &selection, SelectionScheduler &scheduler) :
+ scheduler(scheduler)
+ {
+ if (scheduler.policy == PRE_ALLOC) {
+ this->grfNum = selection.getRegNum();
+ nodes.resize(grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER + MAX_MEM_SYSTEM);
+ } else {
+ const uint32_t simdWidth = scheduler.ctx.getSimdWidth();
+ GBE_ASSERT(simdWidth == 8 || simdWidth == 16);
+ this->grfNum = simdWidth == 8 ? 128 : 64;
+ nodes.resize(grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER + MAX_MEM_SYSTEM);
+ }
+ insnNodes.resize(selection.getLargestBlockSize());
+ }
+
+ void DependencyTracker::clear(void) { for (auto &x : nodes) x = NULL; }
+
+ void DependencyTracker::addDependency(ScheduleDAGNode *node0, ScheduleDAGNode *node1) {
+ if (node0 != NULL && node1 != NULL && node0 != node1 && node0->dependsOn(node1) == false) {
+ ScheduleListNode *dep = scheduler.newScheduleListNode(node0);
+ node0->refNum++;
+ node1->children.push_back(dep);
+ }
+ }
+
+ void DependencyTracker::addDependency(ScheduleDAGNode *node, uint32_t index) {
+ this->addDependency(node, this->nodes[index]);
+ }
+
+ void DependencyTracker::addDependency(uint32_t index, ScheduleDAGNode *node) {
+ this->addDependency(this->nodes[index], node);
+ }
+
+ void DependencyTracker::makeBarrier(int32_t barrierID, int32_t insnNum) {
+ ScheduleDAGNode *barrier = this->insnNodes[barrierID];
+
+ // The barrier depends on all nodes before it
+ for (int32_t insnID = 0; insnID < barrierID; ++insnID)
+ this->addDependency(barrier, this->insnNodes[insnID]);
+
+ // All nodes after barriers depend on the barrier
+ for (int32_t insnID = barrierID + 1; insnID < insnNum; ++insnID)
+ this->addDependency(this->insnNodes[insnID], barrier);
+ }
+
+ static GenRegister getFlag(const SelectionInstruction &insn) {
+ if (insn.state.physicalFlag) {
+ const uint32_t nr = insn.state.flag;
+ const uint32_t subnr = insn.state.subFlag;
+ return GenRegister::flag(nr, subnr);
+ } else
+ return GenRegister::uw1grf(ir::Register(insn.state.flagIndex));
+ }
+
+ uint32_t DependencyTracker::getIndex(GenRegister reg) const {
+ // Non GRF physical register
+ if (reg.physical) {
+ GBE_ASSERT (reg.file == GEN_ARCHITECTURE_REGISTER_FILE);
+ const uint32_t file = reg.nr & 0xf0;
+ const uint32_t nr = reg.nr & 0x0f;
+ if (file == GEN_ARF_FLAG) {
+ const uint32_t subnr = reg.subnr / sizeof(uint16_t);
+ GBE_ASSERT(nr < MAX_FLAG_REGISTER && (subnr == 0 || subnr == 1));
+ return grfNum + 2*nr + subnr;
+ } else if (file == GEN_ARF_ACCUMULATOR) {
+ GBE_ASSERT(nr < MAX_ACC_REGISTER);
+ return grfNum + MAX_FLAG_REGISTER + nr;
+ } else {
+ NOT_SUPPORTED;
+ return 0;
+ }
+ }
+ // We directly manipulate physical GRFs here
+ else if (scheduler.policy == POST_ALLOC) {
+ const GenRegister physical = scheduler.ctx.ra->genReg(reg);
+ const uint32_t simdWidth = scheduler.ctx.getSimdWidth();
+ return simdWidth == 8 ? physical.nr : physical.nr / 2;
+ }
+ // We use virtual registers since allocation is not done yet
+ else
+ return reg.value.reg;
+ }
+
+ uint32_t DependencyTracker::getIndex(uint32_t bti) const {
+ const uint32_t memDelta = grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER;
+ return bti == 0xfe ? memDelta + LOCAL_MEMORY : memDelta + GLOBAL_MEMORY;
+ }
+
+ void DependencyTracker::updateWrites(ScheduleDAGNode *node) {
+ const SelectionInstruction &insn = node->insn;
+
+ // Track writes in registers
+ for (uint32_t dstID = 0; dstID < insn.dstNum; ++dstID) {
+ const GenRegister dst = insn.dst(dstID);
+ if (this->ignoreDependency(dst) == false) {
+ const uint32_t index = this->getIndex(dst);
+ this->nodes[index] = node;
+ }
+ }
+
+ // Track writes in predicates
+ if (insn.opcode == SEL_OP_CMP) {
+ const uint32_t index = this->getIndex(getFlag(insn));
+ this->nodes[index] = node;
+ }
+
+ // Track writes in accumulators
+ if (insn.state.accWrEnable) {
+ const uint32_t index = this->getIndex(GenRegister::acc());
+ this->nodes[index] = node;
+ }
+
+ // Track writes in memory
+ if (insn.isWrite()) {
+ const uint32_t index = this->getIndex(insn.extra.function);
+ this->nodes[index] = node;
+ }
+
+ // Consider barriers and wait write to memory
+ if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_WAIT) {
+ const uint32_t local = this->getIndex(0xfe);
+ const uint32_t global = this->getIndex(0x00);
+ this->nodes[local] = this->nodes[global] = node;
+ }
+ }
+
+ /*! Kind-of roughly estimated latency. Nothing real here */
+ static uint32_t getLatencyGen7(const SelectionInstruction &insn) {
+#define DECL_GEN7_SCHEDULE(FAMILY, LATENCY, SIMD16, SIMD8)\
+ const uint32_t FAMILY##InstructionLatency = LATENCY;
+#include "gen_insn_gen7_schedule_info.hxx"
+#undef DECL_GEN7_SCHEDULE
+
+ switch (insn.opcode) {
+#define DECL_SELECTION_IR(OP, FAMILY) case SEL_OP_##OP: return FAMILY##Latency;
+#include "backend/gen_insn_selection.hxx"
+#undef DECL_SELECTION_IR
+ };
+ return 0;
+ }
+
+ /*! Throughput in cycles for SIMD8 or SIMD16 */
+ static uint32_t getThroughputGen7(const SelectionInstruction &insn, bool isSIMD8) {
+#define DECL_GEN7_SCHEDULE(FAMILY, LATENCY, SIMD16, SIMD8)\
+ const uint32_t FAMILY##InstructionThroughput = isSIMD8 ? SIMD8 : SIMD16;
+#include "gen_insn_gen7_schedule_info.hxx"
+#undef DECL_GEN7_SCHEDULE
+
+ switch (insn.opcode) {
+#define DECL_SELECTION_IR(OP, FAMILY) case SEL_OP_##OP: return FAMILY##Throughput;
+#include "backend/gen_insn_selection.hxx"
+#undef DECL_SELECTION_IR
+ };
+ return 0;
+ }
+
+ SelectionScheduler::SelectionScheduler(GenContext &ctx,
+ Selection &selection,
+ SchedulePolicy policy) :
+ policy(policy), listPool(nextHighestPowerOf2(selection.getLargestBlockSize())),
+ ctx(ctx), selection(selection), tracker(selection, *this)
+ {
+ this->clearLists();
+ }
+
+ void SelectionScheduler::clearLists(void) {
+ this->ready.fast_clear();
+ this->active.fast_clear();
+ }
+
+ int32_t SelectionScheduler::buildDAG(SelectionBlock &bb) {
+ nodePool.rewind();
+ listPool.rewind();
+ tracker.clear();
+ this->clearLists();
+
+ // Track write-after-write and read-after-write dependencies
+ int32_t insnNum = 0;
+ for (auto &insn : bb.insnList) {
+ // Create a new node for this instruction
+ ScheduleDAGNode *node = this->newScheduleDAGNode(insn);
+ tracker.insnNodes[insnNum++] = node;
+
+ // read-after-write in registers
+ for (uint32_t srcID = 0; srcID < insn.srcNum; ++srcID)
+ tracker.addDependency(node, insn.src(srcID));
+
+ // read-after-write for predicate
+ if (insn.state.predicate != GEN_PREDICATE_NONE)
+ tracker.addDependency(node, getFlag(insn));
+
+ // read-after-write in memory
+ if (insn.isRead()) {
+ const uint32_t index = tracker.getIndex(insn.extra.function);
+ tracker.addDependency(node, index);
+ }
+
+ // Consider barriers and wait are reading memory (local and global)
+ if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_WAIT) {
+ const uint32_t local = tracker.getIndex(0xfe);
+ const uint32_t global = tracker.getIndex(0x00);
+ tracker.addDependency(node, local);
+ tracker.addDependency(node, global);
+ }
+
+ // write-after-write in registers
+ for (uint32_t dstID = 0; dstID < insn.dstNum; ++dstID)
+ tracker.addDependency(node, insn.dst(dstID));
+
+ // write-after-write for predicate
+ if (insn.opcode == SEL_OP_CMP)
+ tracker.addDependency(node, getFlag(insn));
+
+ // write-after-write for accumulators
+ if (insn.state.accWrEnable)
+ tracker.addDependency(node, GenRegister::acc());
+
+ // write-after-write in memory
+ if (insn.isWrite()) {
+ const uint32_t index = tracker.getIndex(insn.extra.function);
+ tracker.addDependency(node, index);
+ }
+
+ // Consider barriers and wait are writing memory (local and global)
+ if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_WAIT) {
+ const uint32_t local = tracker.getIndex(0xfe);
+ const uint32_t global = tracker.getIndex(0x00);
+ tracker.addDependency(node, local);
+ tracker.addDependency(node, global);
+ }
+
+ // Track all writes done by the instruction
+ tracker.updateWrites(node);
+ }
+
+ // Track write-after-read dependencies
+ tracker.clear();
+ for (int32_t insnID = insnNum-1; insnID >= 0; --insnID) {
+ ScheduleDAGNode *node = tracker.insnNodes[insnID];
+ const SelectionInstruction &insn = node->insn;
+
+ // write-after-read in registers
+ for (uint32_t srcID = 0; srcID < insn.srcNum; ++srcID)
+ tracker.addDependency(insn.src(srcID), node);
+
+ // write-after-read for predicate
+ if (insn.state.predicate != GEN_PREDICATE_NONE)
+ tracker.addDependency(getFlag(insn), node);
+
+ // write-after-read in memory
+ if (insn.isRead()) {
+ const uint32_t index = tracker.getIndex(insn.extra.function);
+ tracker.addDependency(index, node);
+ }
+
+ // Consider barriers and wait are reading memory (local and global)
+ if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_WAIT) {
+ const uint32_t local = tracker.getIndex(0xfe);
+ const uint32_t global = tracker.getIndex(0x00);
+ tracker.addDependency(local, node);
+ tracker.addDependency(global, node);
+ }
+
+ // Track all writes done by the instruction
+ tracker.updateWrites(node);
+ }
+
+ // Make labels and branches non-schedulable (i.e. they act as barriers)
+ for (int32_t insnID = 0; insnID < insnNum; ++insnID) {
+ ScheduleDAGNode *node = tracker.insnNodes[insnID];
+ if (node->insn.isBranch() || node->insn.isLabel() || node->insn.opcode == SEL_OP_EOT)
+ tracker.makeBarrier(insnID, insnNum);
+ }
+
+ // Build the initial ready list (should only be the label actually)
+ for (int32_t insnID = 0; insnID < insnNum; ++insnID) {
+ ScheduleDAGNode *node = tracker.insnNodes[insnID];
+ if (node->refNum == 0) {
+ ScheduleListNode *listNode = this->newScheduleListNode(node);
+ this->ready.push_back(listNode);
+ }
+ }
+
+ return insnNum;
+ }
+
+ void SelectionScheduler::scheduleDAG(SelectionBlock &bb, int32_t insnNum) {
+ uint32_t cycle = 0;
+ const bool isSIMD8 = this->ctx.getSimdWidth() == 8;
+ while (insnNum) {
+
+ // Retire all the instructions that finished
+ for (auto toRetireIt = active.begin(); toRetireIt != active.end();) {
+ ScheduleDAGNode *toRetireNode = toRetireIt.node()->node;
+ // Instruction is now complete
+ if (toRetireNode->retiredCycle <= cycle) {
+ toRetireIt = this->active.erase(toRetireIt);
+ // Traverse all children and make them ready if no more dependency
+ auto &children = toRetireNode->children;
+ for (auto it = children.begin(); it != children.end();) {
+ if (--it->node->refNum == 0) {
+ ScheduleListNode *listNode = it.node();
+ it = children.erase(it);
+ this->ready.push_back(listNode);
+ } else
+ ++it;
+ }
+ }
+ // Get the next one
+ else
+ ++toRetireIt;
+ }
+
+ // Try to schedule something from the ready list
+ intrusive_list<ScheduleListNode>::iterator toSchedule;
+ if (policy == POST_ALLOC) // FIFO scheduling
+ toSchedule = this->ready.begin();
+ else // LIFO scheduling
+ toSchedule = this->ready.rbegin();
+ // toSchedule = this->ready.begin();
+
+ if (toSchedule != this->ready.end()) {
+ // The instruction is instantaneously issued to simulate zero cycle
+ // scheduling
+ if (policy == POST_ALLOC)
+ cycle += getThroughputGen7(toSchedule->node->insn, isSIMD8);
+
+ this->ready.erase(toSchedule);
+ this->active.push_back(toSchedule.node());
+ // When we schedule before allocation, instruction is instantaneously
+ // ready. This allows to have a real LIFO strategy
+ if (policy == POST_ALLOC)
+ toSchedule->node->retiredCycle = cycle + getLatencyGen7(toSchedule->node->insn);
+ else
+ toSchedule->node->retiredCycle = cycle;
+ bb.append(&toSchedule->node->insn);
+ insnNum--;
+ } else
+ cycle++;
+ }
+ }
+
+ BVAR(OCL_POST_ALLOC_INSN_SCHEDULE, true);
+ BVAR(OCL_PRE_ALLOC_INSN_SCHEDULE, true);
+
+ void schedulePostRegAllocation(GenContext &ctx, Selection &selection) {
+ if (OCL_POST_ALLOC_INSN_SCHEDULE) {
+ SelectionScheduler scheduler(ctx, selection, POST_ALLOC);
+ for (auto &bb : *selection.blockList) {
+ const int32_t insnNum = scheduler.buildDAG(bb);
+ bb.insnList.clear();
+ scheduler.scheduleDAG(bb, insnNum);
+ }
+ }
+ }
+
+ void schedulePreRegAllocation(GenContext &ctx, Selection &selection) {
+ if (OCL_PRE_ALLOC_INSN_SCHEDULE) {
+ SelectionScheduler scheduler(ctx, selection, PRE_ALLOC);
+ for (auto &bb : *selection.blockList) {
+ const int32_t insnNum = scheduler.buildDAG(bb);
+ bb.insnList.clear();
+ scheduler.scheduleDAG(bb, insnNum);
+ }
+ }
+ }
+
+} /* namespace gbe */
+
diff --git a/backend/src/backend/gen_insn_scheduling.hpp b/backend/src/backend/gen_insn_scheduling.hpp
new file mode 100644
index 0000000..534557d
--- /dev/null
+++ b/backend/src/backend/gen_insn_scheduling.hpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file gen_insn_scheduling.hpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __GBE_GEN_INSN_SCHEDULING_HPP__
+#define __GBE_GEN_INSN_SCHEDULING_HPP__
+
+namespace gbe
+{
+ class Selection; // Pre ISA code
+ class GenContext; // Handle compilation for Gen
+
+ /*! Schedule the code per basic block (tends to limit register number) */
+ void schedulePreRegAllocation(GenContext &ctx, Selection &selection);
+
+ /*! Schedule the code per basic block (tends to deal with insn latency) */
+ void schedulePostRegAllocation(GenContext &ctx, Selection &selection);
+
+} /* namespace gbe */
+
+#endif /* __GBE_GEN_INSN_SCHEDULING_HPP__ */
+
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
new file mode 100644
index 0000000..08e80f4
--- /dev/null
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -0,0 +1,2049 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file gen_insn_selection.cpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/* This is the instruction selection code. First of all, this is a bunch of c++
+ * crap. Sorry if this is not that readable. Anyway, the goal here is to take
+ * GenIR code (i.e. the very regular, very RISC IR) and to produce GenISA with
+ * virtual registers (i.e. regular GenIR registers).
+ *
+ * Overall idea:
+ * =============
+ *
+ * There is a lot of papers and research about that but I tried to keep it
+ * simple. No dynamic programming, nothing like this. Just a recursive maximal
+ * munch.
+ *
+ * Basically, the code is executed per basic block from bottom to top. Patterns
+ * of GenIR instructions are defined and each instruction is matched against the
+ * best pattern i.e. the pattern that catches the largest number of
+ * instructions. Once matched, a sequence of instructions is output.
+ *
+ * Each instruction the match depends on is then marked as "root" i.e. we
+ * indicate that each of these instructions must be generated: we indeed need their
+ * destinations for the next instructions (remember that we generate the code in
+ * reverse order)
+ *
+ * Patterns:
+ * =========
+ *
+ * There is a lot of patterns and I did not implement all of them obviously. I
+ * just quickly gather the complete code to make pattern implementation kind of
+ * easy. This is pretty verbose to add a pattern but it should be not too hard
+ * to add new ones.
+ *
+ * To create and register patterns, I just abused C++ pre-main. A bunch of
+ * patterns is then created and sorted per opcode (i.e. the opcode of the root
+ * of the pattern): this creates a library of patterns that may be used in
+ * run-time.
+ *
+ * Predication / Masking and CFG linearization
+ * ===========================================
+ *
+ * The current version is based on an unfortunate choice. Basically, the problem
+ * to solve is how to map unstructured branches (i.e. regular gotos) onto Gen.
+ * Gen has a native support for structured branches (if/else/endif/while...) but
+ * nothing really native for unstructured branches.
+ *
+ * The idea we implemented is simple. We stole one flag register (here f0.0) to
+ * mask all the instructions (and only activate the proper SIMD lanes) and we
+ * use the CFG linearization technique to properly handle the control flow. This
+ * is not really good for one particular reason: Gen instructions must use the
+ * *same* flag register for the predicates (used for masking) and the
+ * conditional modifier (used as a destination for CMP). This leads to extra
+ * complications with compare instructions and select instructions. Basically,
+ * we need to insert extra MOVs.
+ *
+ * Also, there is some extra kludge to handle the predicates for JMPI.
+ *
+ * See TODO for a better idea for branching and masking
+ *
+ * TODO:
+ * =====
+ *
+ * Sadly, I recreated here a new DAG class. This is just a bad idea since we
+ * already have the DAG per basic block with the Function graph i.e. the
+ * complete graph of uses and definitions. I think we should be able to save a
+ * lot of code here if we can simply reuse the code from UD / DU chains.
+ *
+ * Finally, cross-block instruction selection is quite possible with this simple
+ * approach. Basically, instructions from dominating blocks could be merged and
+ * matched with other instructions in the dominated block. This leads to the
+ * interesting approach which consists in traversing the dominator tree in post
+ * order
+ *
+ * About masking and branching, a much better idea (that I found later unfortunately)
+ * is to replace the use of the flag by uses of if/endif to enclose the basic
+ * block. So, instead of using predication, we use auto-masking. The very cool
+ * consequence is that we can reintegrate back the structured branches.
+ * Basically, we will be able to identify branches that can be mapped to
+ * structured branches and mix nicely unstructured branches (which will use
+ * jpmi, if/endif to mask the blocks) and structured branches (which are pretty
+ * fast)
+ */
+
+#include "backend/gen_insn_selection.hpp"
+#include "backend/gen_context.hpp"
+#include "ir/function.hpp"
+#include "ir/liveness.hpp"
+#include "ir/profile.hpp"
+#include "sys/cvar.hpp"
+#include "sys/vector.hpp"
+#include <algorithm>
+
+namespace gbe
+{
+
+ ///////////////////////////////////////////////////////////////////////////
+ // Helper functions
+ ///////////////////////////////////////////////////////////////////////////
+
+ uint32_t getGenType(ir::Type type) {
+ using namespace ir;
+ switch (type) {
+ case TYPE_BOOL: return GEN_TYPE_UW;
+ case TYPE_S8: return GEN_TYPE_B;
+ case TYPE_U8: return GEN_TYPE_UB;
+ case TYPE_S16: return GEN_TYPE_W;
+ case TYPE_U16: return GEN_TYPE_UW;
+ case TYPE_S32: return GEN_TYPE_D;
+ case TYPE_U32: return GEN_TYPE_UD;
+ case TYPE_FLOAT: return GEN_TYPE_F;
+ default: NOT_SUPPORTED; return GEN_TYPE_F;
+ }
+ }
+
+ uint32_t getGenCompare(ir::Opcode opcode) {
+ using namespace ir;
+ switch (opcode) {
+ case OP_LE: return GEN_CONDITIONAL_LE;
+ case OP_LT: return GEN_CONDITIONAL_L;
+ case OP_GE: return GEN_CONDITIONAL_GE;
+ case OP_GT: return GEN_CONDITIONAL_G;
+ case OP_EQ: return GEN_CONDITIONAL_EQ;
+ case OP_NE: return GEN_CONDITIONAL_NEQ;
+ default: NOT_SUPPORTED; return 0u;
+ };
+ }
+
+ ///////////////////////////////////////////////////////////////////////////
+ // SelectionInstruction
+ ///////////////////////////////////////////////////////////////////////////
+
+ SelectionInstruction::SelectionInstruction(SelectionOpcode op, uint32_t dst, uint32_t src) :
+ parent(NULL), opcode(op), dstNum(dst), srcNum(src)
+ {}
+
+ void SelectionInstruction::prepend(SelectionInstruction &other) {
+ gbe::prepend(&other, this);
+ other.parent = this->parent;
+ }
+
+ void SelectionInstruction::append(SelectionInstruction &other) {
+ gbe::append(&other, this);
+ other.parent = this->parent;
+ }
+
+ bool SelectionInstruction::isRead(void) const {
+ return this->opcode == SEL_OP_UNTYPED_READ ||
+ this->opcode == SEL_OP_BYTE_GATHER;
+ }
+
+ bool SelectionInstruction::isWrite(void) const {
+ return this->opcode == SEL_OP_UNTYPED_WRITE ||
+ this->opcode == SEL_OP_BYTE_SCATTER;
+ }
+
+ bool SelectionInstruction::isBranch(void) const {
+ return this->opcode == SEL_OP_JMPI;
+ }
+
+ bool SelectionInstruction::isLabel(void) const {
+ return this->opcode == SEL_OP_LABEL;
+ }
+
+ ///////////////////////////////////////////////////////////////////////////
+ // SelectionVector
+ ///////////////////////////////////////////////////////////////////////////
+
+ SelectionVector::SelectionVector(void) :
+ insn(NULL), reg(NULL), regNum(0), isSrc(0)
+ {}
+
+ ///////////////////////////////////////////////////////////////////////////
+ // SelectionBlock
+ ///////////////////////////////////////////////////////////////////////////
+
+ SelectionBlock::SelectionBlock(const ir::BasicBlock *bb) : bb(bb) {}
+
+ void SelectionBlock::append(ir::Register reg) { tmp.push_back(reg); }
+
+ void SelectionBlock::append(SelectionInstruction *insn) {
+ this->insnList.push_back(insn);
+ insn->parent = this;
+ }
+
+ void SelectionBlock::prepend(SelectionInstruction *insn) {
+ this->insnList.push_front(insn);
+ insn->parent = this;
+ }
+
+ void SelectionBlock::append(SelectionVector *vec) {
+ this->vectorList.push_back(vec);
+ }
+
+ ///////////////////////////////////////////////////////////////////////////
+ // Maximal munch selection on DAG
+ ///////////////////////////////////////////////////////////////////////////
+
+ /*! All instructions in a block are organized into a DAG */
+ class SelectionDAG
+ {
+ public:
+ INLINE SelectionDAG(const ir::Instruction &insn) :
+ insn(insn), mergeable(0), childNum(insn.getSrcNum()), isRoot(0) {
+ for (uint32_t childID = 0; childID < childNum; ++childID)
+ this->child[childID] = NULL;
+ }
+ /*! Mergeable are non-root instructions with valid sources */
+ INLINE void setAsMergeable(uint32_t which) { mergeable|=(1<<which); }
+ /*! Mergeable are non-root instructions with valid sources */
+ INLINE bool isMergeable(uint32_t which) const { return mergeable&(1<<which); }
+ /*! Children that need to be matched */
+ SelectionDAG *child[ir::Instruction::MAX_SRC_NUM];
+ /*! Instruction that needs to be matched */
+ const ir::Instruction &insn;
+ /*! When sources have been overwritten, a child insn cannot be merged */
+ uint32_t mergeable:ir::Instruction::MAX_SRC_NUM;
+ /*! Number of children we have in the pattern */
+ uint32_t childNum:4;
+ /*! A root must be generated, no matter what */
+ uint32_t isRoot:1;
+ };
+
+ /*! A pattern is a tree to match. This is the general interface for them. For
+ * pattern to be matched, we need to match the complete tree i.e. this node
+ * and its child nodes
+ */
+ class SelectionPattern
+ {
+ public:
+ SelectionPattern(uint32_t insnNum, uint32_t cost) :
+ insnNum(insnNum), cost(cost) {}
+ /*! This is an abstract class */
+ virtual ~SelectionPattern(void) {}
+ /*! Emit Gen code in the selection. Return false if no match */
+ virtual bool emit(Selection::Opaque &sel, SelectionDAG &dag) const = 0;
+ /*! All the possible opcodes for this pattern (for fast sort) */
+ vector<ir::Opcode> opcodes;
+ /*! Number of instruction generated */
+ uint32_t insnNum;
+ /*! Cost of the pattern */
+ uint32_t cost;
+ };
+
+ /*! Store and sort all the patterns. This is our global library we use for the
+ * code selection
+ */
+ class SelectionLibrary
+ {
+ public:
+ /*! Will register all the patterns */
+ SelectionLibrary(void);
+ /*! Release and destroy all the registered patterns */
+ ~SelectionLibrary(void);
+ /*! Insert the given pattern for all associated opcodes */
+ template <typename PatternType> void insert(void);
+ /*! One list of pattern per opcode */
+ typedef vector<const SelectionPattern*> PatternList;
+ /*! All lists of patterns properly sorted per opcode */
+ PatternList patterns[ir::OP_INVALID];
+ /*! All patterns to free */
+ vector<const SelectionPattern*> toFree;
+ };
+
+ ///////////////////////////////////////////////////////////////////////////
+ // Code selection internal implementation
+ ///////////////////////////////////////////////////////////////////////////
+
+ /*! Actual implementation of the instruction selection engine */
+ class Selection::Opaque
+ {
+ public:
+ /*! simdWidth is the default width for the instructions */
+ Opaque(GenContext &ctx);
+ /*! Release everything */
+ virtual ~Opaque(void);
+ /*! Implements the instruction selection itself */
+ void select(void);
+ /*! Start a backward generation (from the end of the block) */
+ void startBackwardGeneration(void);
+ /*! End backward code generation and output the code in the block */
+ void endBackwardGeneration(void);
+ /*! Implement public class */
+ uint32_t getLargestBlockSize(void) const;
+ /*! Implement public class */
+ INLINE uint32_t getVectorNum(void) const { return this->vectorNum; }
+ /*! Implement public class */
+ INLINE ir::Register replaceSrc(SelectionInstruction *insn, uint32_t regID);
+ /*! Implement public class */
+ INLINE ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID);
+ /*! Implement public class */
+ INLINE uint32_t getRegNum(void) const { return file.regNum(); }
+ /*! Implements public interface */
+ bool isScalarOrBool(ir::Register reg) const;
+ /*! Implements public interface */
+ INLINE ir::RegisterData getRegisterData(ir::Register reg) const {
+ return file.get(reg);
+ }
+ /*! Implement public class */
+ INLINE ir::RegisterFamily getRegisterFamily(ir::Register reg) const {
+ return file.get(reg).family;
+ }
+ /*! Implement public class */
+ SelectionInstruction *create(SelectionOpcode, uint32_t dstNum, uint32_t srcNum);
+ /*! Return the selection register from the GenIR one */
+ GenRegister selReg(ir::Register, ir::Type type = ir::TYPE_FLOAT) const;
+ /*! Compute the nth register part when using SIMD8 with Qn (n in 2,3,4) */
+ GenRegister selRegQn(ir::Register, uint32_t quarter, ir::Type type = ir::TYPE_FLOAT) const;
+ /*! Size of the stack (should be large enough) */
+ enum { MAX_STATE_NUM = 16 };
+ /*! Push the current instruction state */
+ INLINE void push(void) {
+ assert(stateNum < MAX_STATE_NUM);
+ stack[stateNum++] = curr;
+ }
+ /*! Pop the latest pushed state */
+ INLINE void pop(void) {
+ assert(stateNum > 0);
+ curr = stack[--stateNum];
+ }
+ /*! Create a new register in the register file and append it in the
+ * temporary list of the current block
+ */
+ INLINE ir::Register reg(ir::RegisterFamily family) {
+ GBE_ASSERT(block != NULL);
+ const ir::Register reg = file.append(family);
+ block->append(reg);
+ return reg;
+ }
+ /*! Append a block at the block stream tail. It becomes the current block */
+ void appendBlock(const ir::BasicBlock &bb);
+ /*! Append an instruction in the current block */
+ SelectionInstruction *appendInsn(SelectionOpcode, uint32_t dstNum, uint32_t srcNum);
+ /*! Append a new vector of registers in the current block */
+ SelectionVector *appendVector(void);
+ /*! Build a DAG for the basic block (return number of instructions) */
+ uint32_t buildBasicBlockDAG(const ir::BasicBlock &bb);
+ /*! Perform the selection on the basic block */
+ void matchBasicBlock(uint32_t insnNum);
+ /*! A root instruction needs to be generated */
+ bool isRoot(const ir::Instruction &insn) const;
+
+ /*! To handle selection block allocation */
+ DECL_POOL(SelectionBlock, blockPool);
+ /*! To handle selection instruction allocation */
+ LinearAllocator insnAllocator;
+ /*! To handle selection vector allocation */
+ DECL_POOL(SelectionVector, vecPool);
+ /*! Per register information used with top-down block sweeping */
+ vector<SelectionDAG*> regDAG;
+ /*! Store one DAG per instruction */
+ vector<SelectionDAG*> insnDAG;
+ /*! Owns this structure */
+ GenContext &ctx;
+ /*! Tail of the code fragment for backward code generation */
+ intrusive_list<SelectionInstruction> bwdList;
+ /*! List of emitted blocks */
+ intrusive_list<SelectionBlock> blockList;
+ /*! Currently processed block */
+ SelectionBlock *block;
+ /*! Current instruction state to use */
+ GenInstructionState curr;
+ /*! We append new registers so we duplicate the function register file */
+ ir::RegisterFile file;
+ /*! State used to encode the instructions */
+ GenInstructionState stack[MAX_STATE_NUM];
+ /*! Maximum number of instructions in the basic blocks */
+ uint32_t maxInsnNum;
+ /*! Speed up instruction dag allocation */
+ DECL_POOL(SelectionDAG, dagPool);
+ /*! Total number of registers in the function we encode */
+ uint32_t regNum;
+ /*! Number of states currently pushed */
+ uint32_t stateNum;
+ /*! Number of vector allocated */
+ uint32_t vectorNum;
+ /*! If true, generate code backward */
+ bool bwdCodeGeneration;
+ /*! To make function prototypes more readable */
+ typedef const GenRegister &Reg;
+
+#define ALU1(OP) \
+ INLINE void OP(Reg dst, Reg src) { ALU1(SEL_OP_##OP, dst, src); }
+#define ALU2(OP) \
+ INLINE void OP(Reg dst, Reg src0, Reg src1) { ALU2(SEL_OP_##OP, dst, src0, src1); }
+#define ALU3(OP) \
+ INLINE void OP(Reg dst, Reg src0, Reg src1, Reg src2) { ALU3(SEL_OP_##OP, dst, src0, src1, src2); }
+ ALU1(MOV)
+ ALU1(RNDZ)
+ ALU1(RNDE)
+ ALU2(SEL)
+ ALU1(NOT)
+ ALU2(AND)
+ ALU2(OR)
+ ALU2(XOR)
+ ALU2(SHR)
+ ALU2(SHL)
+ ALU2(RSR)
+ ALU2(RSL)
+ ALU2(ASR)
+ ALU2(ADD)
+ ALU2(MUL)
+ ALU1(FRC)
+ ALU1(RNDD)
+ ALU1(RNDU)
+ ALU2(MACH)
+ ALU1(LZD)
+ ALU3(MAD)
+#undef ALU1
+#undef ALU2
+#undef ALU3
+ /*! Encode a barrier instruction */
+ void BARRIER(GenRegister src);
+ /*! Encode a label instruction */
+ void LABEL(ir::LabelIndex label);
+ /*! Jump indexed instruction */
+ void JMPI(Reg src, ir::LabelIndex target);
+ /*! Compare instructions */
+ void CMP(uint32_t conditional, Reg src0, Reg src1);
+ /*! Select instruction with embedded comparison */
+ void SEL_CMP(uint32_t conditional, Reg dst, Reg src0, Reg src1);
+ /*! EOT is used to finish GPGPU threads */
+ void EOT(void);
+ /*! No-op */
+ void NOP(void);
+ /*! Wait instruction (used for the barrier) */
+ void WAIT(void);
+ /*! Untyped read (up to 4 elements) */
+ void UNTYPED_READ(Reg addr, const GenRegister *dst, uint32_t elemNum, uint32_t bti);
+ /*! Untyped write (up to 4 elements) */
+ void UNTYPED_WRITE(Reg addr, const GenRegister *src, uint32_t elemNum, uint32_t bti);
+ /*! Byte gather (for unaligned bytes, shorts and ints) */
+ void BYTE_GATHER(Reg dst, Reg addr, uint32_t elemSize, uint32_t bti);
+ /*! Byte scatter (for unaligned bytes, shorts and ints) */
+ void BYTE_SCATTER(Reg addr, Reg src, uint32_t elemSize, uint32_t bti);
+ /*! Extended math function (2 arguments) */
+ void MATH(Reg dst, uint32_t function, Reg src0, Reg src1);
+ /*! Extended math function (1 argument) */
+ void MATH(Reg dst, uint32_t function, Reg src);
+ /*! Encode unary instructions */
+ void ALU1(SelectionOpcode opcode, Reg dst, Reg src);
+ /*! Encode binary instructions */
+ void ALU2(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1);
+ /*! Encode ternary instructions */
+ void ALU3(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg src2);
+ /*! Use custom allocators */
+ GBE_CLASS(Opaque);
+ friend class SelectionBlock;
+ friend class SelectionInstruction;
+ };
+
+ ///////////////////////////////////////////////////////////////////////////
+ // Helper function
+ ///////////////////////////////////////////////////////////////////////////
+
+ /*! Directly mark all sources as root (when no match is found) */
+ static void markAllChildren(SelectionDAG &dag) {
+ // Do not merge anything, so all sources become roots
+ for (uint32_t childID = 0; childID < dag.childNum; ++childID)
+ if (dag.child[childID])
+ dag.child[childID]->isRoot = 1;
+ }
+
+ /*! Helper function to figure if two sources are the same */
+ static bool sourceMatch(SelectionDAG *src0DAG, uint32_t src0ID,
+ SelectionDAG *src1DAG, uint32_t src1ID)
+ {
+ GBE_ASSERT(src0DAG && src1DAG);
+ // Ensure they are the same physical registers
+ const ir::Register src0 = src0DAG->insn.getSrc(src0ID);
+ const ir::Register src1 = src1DAG->insn.getSrc(src1ID);
+ if (src0 != src1)
+ return false;
+ // Ensure they contain the same values
+ return src0DAG->child[src0ID] == src1DAG->child[src1ID];
+ }
+
+
+ Selection::Opaque::Opaque(GenContext &ctx) :
+ ctx(ctx), block(NULL),
+ curr(ctx.getSimdWidth()), file(ctx.getFunction().getRegisterFile()),
+ maxInsnNum(ctx.getFunction().getLargestBlockSize()), dagPool(maxInsnNum),
+ stateNum(0), vectorNum(0), bwdCodeGeneration(false)
+ {
+ const ir::Function &fn = ctx.getFunction();
+ this->regNum = fn.regNum();
+ this->regDAG.resize(regNum);
+ this->insnDAG.resize(maxInsnNum);
+ }
+
+ Selection::Opaque::~Opaque(void) {
+ for (auto it = blockList.begin(); it != blockList.end();) {
+ SelectionBlock &block = *it;
+ ++it;
+ this->deleteSelectionBlock(&block);
+ }
+ }
+
+ SelectionInstruction*
+ Selection::Opaque::create(SelectionOpcode opcode, uint32_t dstNum, uint32_t srcNum)
+ {
+ const size_t regSize = (dstNum+srcNum)*sizeof(GenRegister);
+ const size_t size = sizeof(SelectionInstruction) + regSize;
+ void *ptr = insnAllocator.allocate(size);
+ return new (ptr) SelectionInstruction(opcode, dstNum, srcNum);
+ }
+
+ void Selection::Opaque::startBackwardGeneration(void) {
+ this->bwdCodeGeneration = true;
+ }
+
+ void Selection::Opaque::endBackwardGeneration(void) {
+ for (auto it = bwdList.rbegin(); it != bwdList.rend();) {
+ SelectionInstruction &insn = *it;
+ auto toRemoveIt = it--;
+ bwdList.erase(toRemoveIt);
+ this->block->prepend(&insn);
+ }
+
+ this->bwdCodeGeneration = false;
+ }
+
+ uint32_t Selection::Opaque::getLargestBlockSize(void) const {
+ size_t maxInsnNum = 0;
+ for (const auto &bb : blockList)
+ maxInsnNum = std::max(maxInsnNum, bb.insnList.size());
+ return uint32_t(maxInsnNum);
+ }
+
+ void Selection::Opaque::appendBlock(const ir::BasicBlock &bb) {
+ this->block = this->newSelectionBlock(&bb);
+ this->blockList.push_back(this->block);
+ }
+
+ SelectionInstruction *Selection::Opaque::appendInsn(SelectionOpcode opcode,
+ uint32_t dstNum,
+ uint32_t srcNum)
+ {
+ GBE_ASSERT(this->block != NULL);
+ SelectionInstruction *insn = this->create(opcode, dstNum, srcNum);
+ if (this->bwdCodeGeneration)
+ this->bwdList.push_back(insn);
+ else
+ this->block->append(insn);
+ insn->state = this->curr;
+ return insn;
+ }
+
+ SelectionVector *Selection::Opaque::appendVector(void) {
+ GBE_ASSERT(this->block != NULL);
+ SelectionVector *vector = this->newSelectionVector();
+
+ if (this->bwdCodeGeneration)
+ vector->insn = this->bwdList.back();
+ else
+ vector->insn = this->block->insnList.back();
+ this->block->append(vector);
+ this->vectorNum++;
+ return vector;
+ }
+
+ ir::Register Selection::Opaque::replaceSrc(SelectionInstruction *insn, uint32_t regID) {
+ SelectionBlock *block = insn->parent;
+ const uint32_t simdWidth = ctx.getSimdWidth();
+ ir::Register tmp;
+
+ // This will append the temporary register in the instruction block
+ this->block = block;
+ tmp = this->reg(ir::FAMILY_DWORD);
+
+ // Generate the MOV instruction and replace the register in the instruction
+ SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1);
+ mov->src(0) = GenRegister::retype(insn->src(regID), GEN_TYPE_F);
+ mov->state = GenInstructionState(simdWidth);
+ insn->src(regID) = mov->dst(0) = GenRegister::fxgrf(simdWidth, tmp);
+ insn->prepend(*mov);
+
+ return tmp;
+ }
+
+ ir::Register Selection::Opaque::replaceDst(SelectionInstruction *insn, uint32_t regID) {
+ SelectionBlock *block = insn->parent;
+ const uint32_t simdWidth = ctx.getSimdWidth();
+ ir::Register tmp;
+
+ // This will append the temporary register in the instruction block
+ this->block = block;
+ tmp = this->reg(ir::FAMILY_DWORD);
+
+ // Generate the MOV instruction and replace the register in the instruction
+ SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1);
+ mov->dst(0) = GenRegister::retype(insn->dst(regID), GEN_TYPE_F);
+ mov->state = GenInstructionState(simdWidth);
+ insn->dst(regID) = mov->src(0) = GenRegister::fxgrf(simdWidth, tmp);
+ insn->append(*mov);
+
+ return tmp;
+ }
+
+ bool Selection::Opaque::isScalarOrBool(ir::Register reg) const {
+ if (ctx.isScalarReg(reg))
+ return true;
+ else {
+ const ir::RegisterFamily family = file.get(reg).family;
+ return family == ir::FAMILY_BOOL;
+ }
+ }
+
+#define SEL_REG(SIMD16, SIMD8, SIMD1) \
+ if (ctx.sel->isScalarOrBool(reg) == true) \
+ return GenRegister::retype(GenRegister::SIMD1(reg), genType); \
+ else if (simdWidth == 8) \
+ return GenRegister::retype(GenRegister::SIMD8(reg), genType); \
+ else { \
+ GBE_ASSERT (simdWidth == 16); \
+ return GenRegister::retype(GenRegister::SIMD16(reg), genType); \
+ }
+
+ GenRegister Selection::Opaque::selReg(ir::Register reg, ir::Type type) const {
+ using namespace ir;
+ const uint32_t genType = getGenType(type);
+ const uint32_t simdWidth = ctx.getSimdWidth();
+ const RegisterData data = file.get(reg);
+ const RegisterFamily family = data.family;
+ switch (family) {
+ case FAMILY_BOOL: SEL_REG(uw1grf, uw1grf, uw1grf); break;
+ case FAMILY_WORD: SEL_REG(uw16grf, uw8grf, uw1grf); break;
+ case FAMILY_BYTE: SEL_REG(ub16grf, ub8grf, ub1grf); break;
+ case FAMILY_DWORD: SEL_REG(f16grf, f8grf, f1grf); break;
+ default: NOT_SUPPORTED;
+ }
+ GBE_ASSERT(false);
+ return GenRegister();
+ }
+
+#undef SEL_REG
+
+ GenRegister Selection::Opaque::selRegQn(ir::Register reg, uint32_t q, ir::Type type) const {
+ GenRegister sreg = this->selReg(reg, type);
+ sreg.quarter = q;
+ return sreg;
+ }
+
+ /*! Syntactic sugar for method declaration */
+ typedef const GenRegister &Reg;
+
+ void Selection::Opaque::LABEL(ir::LabelIndex index) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_LABEL, 0, 0);
+ insn->index = uint16_t(index);
+ }
+
+ void Selection::Opaque::BARRIER(GenRegister src) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_BARRIER, 0, 1);
+ insn->src(0) = src;
+ }
+
+ void Selection::Opaque::JMPI(Reg src, ir::LabelIndex index) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_JMPI, 0, 1);
+ insn->src(0) = src;
+ insn->index = uint16_t(index);
+ }
+
+ void Selection::Opaque::CMP(uint32_t conditional, Reg src0, Reg src1) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_CMP, 0, 2);
+ insn->src(0) = src0;
+ insn->src(1) = src1;
+ insn->extra.function = conditional;
+ }
+
+ void Selection::Opaque::SEL_CMP(uint32_t conditional, Reg dst, Reg src0, Reg src1) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_SEL_CMP, 1, 2);
+ insn->dst(0) = dst;
+ insn->src(0) = src0;
+ insn->src(1) = src1;
+ insn->extra.function = conditional;
+ }
+
+ void Selection::Opaque::EOT(void) { this->appendInsn(SEL_OP_EOT, 0, 0); }
+ void Selection::Opaque::NOP(void) { this->appendInsn(SEL_OP_NOP, 0, 0); }
+ void Selection::Opaque::WAIT(void) { this->appendInsn(SEL_OP_WAIT, 0, 0); }
+
+ void Selection::Opaque::UNTYPED_READ(Reg addr,
+ const GenRegister *dst,
+ uint32_t elemNum,
+ uint32_t bti)
+ {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_UNTYPED_READ, elemNum, 1);
+ SelectionVector *srcVector = this->appendVector();
+ SelectionVector *dstVector = this->appendVector();
+
+ // Regular instruction to encode
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
+ insn->dst(elemID) = dst[elemID];
+ insn->src(0) = addr;
+ insn->extra.function = bti;
+ insn->extra.elem = elemNum;
+
+ // Sends require contiguous allocation
+ dstVector->regNum = elemNum;
+ dstVector->isSrc = 0;
+ dstVector->reg = &insn->dst(0);
+
+ // Source cannot be scalar (yet)
+ srcVector->regNum = 1;
+ srcVector->isSrc = 1;
+ srcVector->reg = &insn->src(0);
+ }
+
+ void Selection::Opaque::UNTYPED_WRITE(Reg addr,
+ const GenRegister *src,
+ uint32_t elemNum,
+ uint32_t bti)
+ {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_UNTYPED_WRITE, 0, elemNum+1);
+ SelectionVector *vector = this->appendVector();
+
+ // Regular instruction to encode
+ insn->src(0) = addr;
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
+ insn->src(elemID+1) = src[elemID];
+ insn->extra.function = bti;
+ insn->extra.elem = elemNum;
+
+ // Sends require contiguous allocation for the sources
+ vector->regNum = elemNum+1;
+ vector->reg = &insn->src(0);
+ vector->isSrc = 1;
+ }
+
+ void Selection::Opaque::BYTE_GATHER(Reg dst, Reg addr, uint32_t elemSize, uint32_t bti) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_BYTE_GATHER, 1, 1);
+ SelectionVector *srcVector = this->appendVector();
+ SelectionVector *dstVector = this->appendVector();
+
+ // Instruction to encode
+ insn->src(0) = addr;
+ insn->dst(0) = dst;
+ insn->extra.function = bti;
+ insn->extra.elem = elemSize;
+
+ // byte gather requires vector in the sense that scalar are not allowed
+ // (yet)
+ dstVector->regNum = 1;
+ dstVector->isSrc = 0;
+ dstVector->reg = &insn->dst(0);
+ srcVector->regNum = 1;
+ srcVector->isSrc = 1;
+ srcVector->reg = &insn->src(0);
+ }
+
+ void Selection::Opaque::BYTE_SCATTER(Reg addr, Reg src, uint32_t elemSize, uint32_t bti) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_BYTE_SCATTER, 0, 2);
+ SelectionVector *vector = this->appendVector();
+
+ // Instruction to encode
+ insn->src(0) = addr;
+ insn->src(1) = src;
+ insn->extra.function = bti;
+ insn->extra.elem = elemSize;
+
+ // value and address are contiguous in the send
+ vector->regNum = 2;
+ vector->isSrc = 1;
+ vector->reg = &insn->src(0);
+ }
+
+ void Selection::Opaque::MATH(Reg dst, uint32_t function, Reg src0, Reg src1) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_MATH, 1, 2);
+ insn->dst(0) = dst;
+ insn->src(0) = src0;
+ insn->src(1) = src1;
+ insn->extra.function = function;
+ }
+
+ void Selection::Opaque::MATH(Reg dst, uint32_t function, Reg src) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_MATH, 1, 1);
+ insn->dst(0) = dst;
+ insn->src(0) = src;
+ insn->extra.function = function;
+ }
+
+ void Selection::Opaque::ALU1(SelectionOpcode opcode, Reg dst, Reg src) {
+ SelectionInstruction *insn = this->appendInsn(opcode, 1, 1);
+ insn->dst(0) = dst;
+ insn->src(0) = src;
+ }
+
+ void Selection::Opaque::ALU2(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1) {
+ SelectionInstruction *insn = this->appendInsn(opcode, 1, 2);
+ insn->dst(0) = dst;
+ insn->src(0) = src0;
+ insn->src(1) = src1;
+ }
+
+ void Selection::Opaque::ALU3(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg src2) {
+ SelectionInstruction *insn = this->appendInsn(opcode, 1, 3);
+ insn->dst(0) = dst;
+ insn->src(0) = src0;
+ insn->src(1) = src1;
+ insn->src(2) = src2;
+ }
+
+ // Boiler plate to initialize the selection library at c++ pre-main
+ static SelectionLibrary *selLib = NULL;
+ static void destroySelectionLibrary(void) { GBE_DELETE(selLib); }
+ static struct SelectionLibraryInitializer {
+ SelectionLibraryInitializer(void) {
+ selLib = GBE_NEW_NO_ARG(SelectionLibrary);
+ atexit(destroySelectionLibrary);
+ }
+ } selectionLibraryInitializer;
+
+ bool Selection::Opaque::isRoot(const ir::Instruction &insn) const {
+ if (insn.getDstNum() > 1 ||
+ insn.hasSideEffect() ||
+ insn.isMemberOf<ir::BranchInstruction>() ||
+ insn.isMemberOf<ir::LabelInstruction>())
+ return true;
+
+ // No side effect, not a branch and no destination? Impossible
+ GBE_ASSERT(insn.getDstNum() == 1);
+
+ // Root if alive outside the block.
+ // XXX we should use Value and not registers in liveness info
+ const ir::BasicBlock *insnBlock = insn.getParent();
+ const ir::Liveness &liveness = this->ctx.getLiveness();
+ const ir::Liveness::LiveOut &liveOut = liveness.getLiveOut(insnBlock);
+ const ir::Register reg = insn.getDst(0);
+ if (liveOut.contains(reg))
+ return true;
+
+ // The instruction is only used in the current basic block
+ return false;
+ }
+
+ uint32_t Selection::Opaque::buildBasicBlockDAG(const ir::BasicBlock &bb)
+ {
+ using namespace ir;
+
+ // Clear all registers
+ for (uint32_t regID = 0; regID < this->regNum; ++regID)
+ this->regDAG[regID] = NULL;
+
+ // Build the DAG on the fly
+ uint32_t insnNum = 0;
+ const_cast<BasicBlock&>(bb).foreach([&](const Instruction &insn) {
+
+ // Build a selectionDAG node for instruction
+ SelectionDAG *dag = this->newSelectionDAG(insn);
+
+ // Point to non-root children
+ const uint32_t srcNum = insn.getSrcNum();
+ for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
+ const ir::Register reg = insn.getSrc(srcID);
+ SelectionDAG *child = this->regDAG[reg];
+ if (child) {
+ const ir::Instruction &childInsn = child->insn;
+ const uint32_t childSrcNum = childInsn.getSrcNum();
+
+ // We can merge a child only if its sources are still valid
+ bool mergeable = true;
+ for (uint32_t otherID = 0; otherID < childSrcNum; ++otherID) {
+ const SelectionDAG *srcDAG = child->child[otherID];
+ const ir::Register srcReg = childInsn.getSrc(otherID);
+ SelectionDAG *currDAG = this->regDAG[srcReg];
+ if (srcDAG != currDAG) {
+ mergeable = false;
+ break;
+ }
+ }
+ if (mergeable) dag->setAsMergeable(srcID);
+ dag->child[srcID] = child;
+ } else
+ dag->child[srcID] = NULL;
+ }
+
+ // Make it a root if we must
+ if (this->isRoot(insn)) dag->isRoot = 1;
+
+ // Save the DAG <-> instruction mapping
+ this->insnDAG[insnNum++] = dag;
+
+ // Associate all output registers to this instruction
+ const uint32_t dstNum = insn.getDstNum();
+ for (uint32_t dstID = 0; dstID < dstNum; ++dstID) {
+ const ir::Register reg = insn.getDst(dstID);
+ this->regDAG[reg] = dag;
+ }
+ });
+
+ return insnNum;
+ }
+
+ void Selection::Opaque::matchBasicBlock(uint32_t insnNum)
+ {
+ // Bottom up code generation
+ for (int32_t insnID = insnNum-1; insnID >= 0; --insnID) {
+ // Process all possible patterns for this instruction
+ SelectionDAG &dag = *insnDAG[insnID];
+ if (dag.isRoot) {
+ const ir::Instruction &insn = dag.insn;
+ const ir::Opcode opcode = insn.getOpcode();
+ auto it = selLib->patterns[opcode].begin();
+ const auto end = selLib->patterns[opcode].end();
+
+ // Start a new code fragment
+ this->startBackwardGeneration();
+
+ // Try all the patterns from best to worst
+ do {
+ if ((*it)->emit(*this, dag))
+ break;
+ ++it;
+ } while (it != end);
+ GBE_ASSERT(it != end);
+
+ // Output the code in the current basic block
+ this->endBackwardGeneration();
+ }
+ }
+ }
+
+ void Selection::Opaque::select(void)
+ {
+ using namespace ir;
+ const Function &fn = ctx.getFunction();
+
+ // Perform the selection per basic block
+ fn.foreachBlock([&](const BasicBlock &bb) {
+ this->dagPool.rewind();
+ this->appendBlock(bb);
+ const uint32_t insnNum = this->buildBasicBlockDAG(bb);
+ this->matchBasicBlock(insnNum);
+ });
+ }
+
+ ///////////////////////////////////////////////////////////////////////////
+ // Code selection public implementation
+ ///////////////////////////////////////////////////////////////////////////
+
+ Selection::Selection(GenContext &ctx) {
+ this->blockList = NULL;
+ this->opaque = GBE_NEW(Selection::Opaque, ctx);
+ }
+
+ Selection::~Selection(void) { GBE_DELETE(this->opaque); }
+
+ void Selection::select(void) {
+ this->opaque->select();
+ this->blockList = &this->opaque->blockList;
+ }
+
+ bool Selection::isScalarOrBool(ir::Register reg) const {
+ return this->opaque->isScalarOrBool(reg);
+ }
+
+ uint32_t Selection::getLargestBlockSize(void) const {
+ return this->opaque->getLargestBlockSize();
+ }
+
+ uint32_t Selection::getVectorNum(void) const {
+ return this->opaque->getVectorNum();
+ }
+
+ uint32_t Selection::getRegNum(void) const {
+ return this->opaque->getRegNum();
+ }
+
+ ir::RegisterFamily Selection::getRegisterFamily(ir::Register reg) const {
+ return this->opaque->getRegisterFamily(reg);
+ }
+
+ ir::RegisterData Selection::getRegisterData(ir::Register reg) const {
+ return this->opaque->getRegisterData(reg);
+ }
+
+ ir::Register Selection::replaceSrc(SelectionInstruction *insn, uint32_t regID) {
+ return this->opaque->replaceSrc(insn, regID);
+ }
+
+ ir::Register Selection::replaceDst(SelectionInstruction *insn, uint32_t regID) {
+ return this->opaque->replaceDst(insn, regID);
+ }
+
+ SelectionInstruction *Selection::create(SelectionOpcode opcode, uint32_t dstNum, uint32_t srcNum) {
+ return this->opaque->create(opcode, dstNum, srcNum);
+ }
+
+ ///////////////////////////////////////////////////////////////////////////
+ // Implementation of all patterns
+ ///////////////////////////////////////////////////////////////////////////
+
+ GenRegister getRegisterFromImmediate(ir::Immediate imm)
+ {
+ using namespace ir;
+ switch (imm.type) {
+ case TYPE_U32: return GenRegister::immud(imm.data.u32);
+ case TYPE_S32: return GenRegister::immd(imm.data.s32);
+ case TYPE_FLOAT: return GenRegister::immf(imm.data.f32);
+ case TYPE_U16: return GenRegister::immuw(imm.data.u16);
+ case TYPE_S16: return GenRegister::immw(imm.data.s16);
+ case TYPE_U8: return GenRegister::immuw(imm.data.u8);
+ case TYPE_S8: return GenRegister::immw(imm.data.s8);
+ default: NOT_SUPPORTED; return GenRegister::immuw(0);
+ }
+ }
+
+ /*! Template for the one-to-many instruction patterns */
+ template <typename T, typename U>
+ class OneToManyPattern : public SelectionPattern
+ {
+ public:
+ /*! Register the pattern for all opcodes of the family */
+ OneToManyPattern(uint32_t insnNum, uint32_t cost) :
+ SelectionPattern(insnNum, cost)
+ {
+ for (uint32_t op = 0; op < ir::OP_INVALID; ++op)
+ if (ir::isOpcodeFrom<U>(ir::Opcode(op)) == true)
+ this->opcodes.push_back(ir::Opcode(op));
+ }
+ /*! Call the child method with the proper prototype */
+ virtual bool emit(Selection::Opaque &sel, SelectionDAG &dag) const {
+ if (static_cast<const T*>(this)->emitOne(sel, ir::cast<U>(dag.insn))) {
+ markAllChildren(dag);
+ return true;
+ }
+ return false;
+ }
+ };
+
+/*! Declare a naive one-to-many pattern */
+#define DECL_PATTERN(FAMILY) \
+ struct FAMILY##Pattern : public OneToManyPattern<FAMILY##Pattern, ir::FAMILY>
+
+#define DECL_CTOR(FAMILY, INSN_NUM, COST) \
+ FAMILY##Pattern(void) : OneToManyPattern<FAMILY##Pattern, ir::FAMILY>(INSN_NUM, COST) {}
+
+ /*! Unary instruction patterns */
+ DECL_PATTERN(UnaryInstruction)
+ {
+ INLINE bool emitOne(Selection::Opaque &sel, const ir::UnaryInstruction &insn) const {
+ const ir::Opcode opcode = insn.getOpcode();
+ const GenRegister dst = sel.selReg(insn.getDst(0));
+ const GenRegister src = sel.selReg(insn.getSrc(0));
+ switch (opcode) {
+ case ir::OP_ABS: sel.MOV(dst, GenRegister::abs(src)); break;
+ case ir::OP_MOV: sel.MOV(dst, src); break;
+ case ir::OP_RNDD: sel.RNDD(dst, src); break;
+ case ir::OP_RNDE: sel.RNDE(dst, src); break;
+ case ir::OP_RNDU: sel.RNDU(dst, src); break;
+ case ir::OP_RNDZ: sel.RNDZ(dst, src); break;
+ case ir::OP_COS: sel.MATH(dst, GEN_MATH_FUNCTION_COS, src); break;
+ case ir::OP_SIN: sel.MATH(dst, GEN_MATH_FUNCTION_SIN, src); break;
+ case ir::OP_LOG: sel.MATH(dst, GEN_MATH_FUNCTION_LOG, src); break;
+ case ir::OP_SQR: sel.MATH(dst, GEN_MATH_FUNCTION_SQRT, src); break;
+ case ir::OP_RSQ: sel.MATH(dst, GEN_MATH_FUNCTION_RSQ, src); break;
+ case ir::OP_RCP: sel.MATH(dst, GEN_MATH_FUNCTION_INV, src); break;
+ default: NOT_SUPPORTED;
+ }
+ return true;
+ }
+ DECL_CTOR(UnaryInstruction, 1, 1)
+ };
+
+ BVAR(OCL_OPTIMIZE_IMMEDIATE, true);
+
+ /*! Binary regular instruction pattern */
+ class BinaryInstructionPattern : public SelectionPattern
+ {
+ public:
+ BinaryInstructionPattern(void) : SelectionPattern(1,1) {
+ for (uint32_t op = 0; op < ir::OP_INVALID; ++op)
+ if (ir::isOpcodeFrom<ir::BinaryInstruction>(ir::Opcode(op)) == true)
+ this->opcodes.push_back(ir::Opcode(op));
+ }
+
+ INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const
+ {
+ using namespace ir;
+ const ir::BinaryInstruction &insn = cast<BinaryInstruction>(dag.insn);
+ const Opcode opcode = insn.getOpcode();
+ const Type type = insn.getType();
+ GenRegister dst = sel.selReg(insn.getDst(0), type);
+
+ // Immediates not supported
+ if (opcode == OP_DIV || opcode == OP_POW) {
+ GBE_ASSERT(type == TYPE_FLOAT);
+ const GenRegister src0 = sel.selReg(insn.getSrc(0), type);
+ const GenRegister src1 = sel.selReg(insn.getSrc(1), type);
+ const uint32_t mathOp = opcode == OP_DIV ?
+ GEN_MATH_FUNCTION_FDIV :
+ GEN_MATH_FUNCTION_POW;
+ sel.MATH(dst, mathOp, src0, src1);
+ markAllChildren(dag);
+ return true;
+ }
+
+ sel.push();
+
+ // Boolean values use scalars
+ if (sel.isScalarOrBool(insn.getDst(0)) == true) {
+ sel.curr.execWidth = 1;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ }
+
+ // Look for immediate values
+ GenRegister src0, src1;
+ SelectionDAG *dag0 = dag.child[0];
+ SelectionDAG *dag1 = dag.child[1];
+
+ // Right source can always be an immediate
+ if (OCL_OPTIMIZE_IMMEDIATE && dag1 != NULL && dag1->insn.getOpcode() == OP_LOADI) {
+ const auto &childInsn = cast<LoadImmInstruction>(dag1->insn);
+ src0 = sel.selReg(insn.getSrc(0), type);
+ src1 = getRegisterFromImmediate(childInsn.getImmediate());
+ if (dag0) dag0->isRoot = 1;
+ }
+ // Left source cannot be immediate but it is OK if we can commute
+ else if (OCL_OPTIMIZE_IMMEDIATE && dag0 != NULL && insn.commutes() && dag0->insn.getOpcode() == OP_LOADI) {
+ const auto &childInsn = cast<LoadImmInstruction>(dag0->insn);
+ src0 = sel.selReg(insn.getSrc(1), type);
+ src1 = getRegisterFromImmediate(childInsn.getImmediate());
+ if (dag1) dag1->isRoot = 1;
+ }
+ // Just grab the two sources
+ else {
+ src0 = sel.selReg(insn.getSrc(0), type);
+ src1 = sel.selReg(insn.getSrc(1), type);
+ markAllChildren(dag);
+ }
+
+ // Output the binary instruction
+ switch (opcode) {
+ case OP_ADD: sel.ADD(dst, src0, src1); break;
+ case OP_XOR: sel.XOR(dst, src0, src1); break;
+ case OP_OR: sel.OR(dst, src0, src1); break;
+ case OP_AND: sel.AND(dst, src0, src1); break;
+ case OP_SUB: sel.ADD(dst, src0, GenRegister::negate(src1)); break;
+ case OP_SHL: sel.SHL(dst, src0, src1); break;
+ case OP_SHR: sel.SHR(dst, src0, src1); break;
+ case OP_ASR: sel.ASR(dst, src0, src1); break;
+ case OP_MUL:
+ if (type == TYPE_FLOAT)
+ sel.MUL(dst, src0, src1);
+ else if (type == TYPE_U32 || type == TYPE_S32) {
+ sel.pop();
+ return false;
+ }
+ else
+ NOT_IMPLEMENTED;
+ break;
+ default: NOT_IMPLEMENTED;
+ }
+ sel.pop();
+ return true;
+ }
+ };
+
+ /*! MAD pattern */
+ class MulAddInstructionPattern : public SelectionPattern
+ {
+ public:
+ /*! Register the pattern for all opcodes of the family */
+ MulAddInstructionPattern(void) : SelectionPattern(2, 1) {
+ this->opcodes.push_back(ir::OP_ADD);
+ }
+
+ /*! Implements base class */
+ virtual bool emit(Selection::Opaque &sel, SelectionDAG &dag) const
+ {
+ using namespace ir;
+
+ // MAD tend to increase liveness of the sources (since there are three of
+ // them). TODO refine this strategy. Well, we should be able at least to
+ // evaluate per basic block register pressure and selectively enable
+ // disable MADs
+ if (sel.ctx.limitRegisterPressure)
+ return false;
+
+ // We are good to try. We need a MUL for one of the two sources
+ const ir::BinaryInstruction &insn = cast<ir::BinaryInstruction>(dag.insn);
+ if (insn.getType() != TYPE_FLOAT)
+ return false;
+ SelectionDAG *child0 = dag.child[0];
+ SelectionDAG *child1 = dag.child[1];
+ const GenRegister dst = sel.selReg(insn.getDst(0), TYPE_FLOAT);
+ if (child0 && child0->insn.getOpcode() == OP_MUL) {
+ GBE_ASSERT(cast<ir::BinaryInstruction>(child0->insn).getType() == TYPE_FLOAT);
+ const GenRegister src0 = sel.selReg(child0->insn.getSrc(0), TYPE_FLOAT);
+ const GenRegister src1 = sel.selReg(child0->insn.getSrc(1), TYPE_FLOAT);
+ const GenRegister src2 = sel.selReg(insn.getSrc(1), TYPE_FLOAT);
+ sel.MAD(dst, src2, src0, src1); // order different on HW!
+ if (child0->child[0]) child0->child[0]->isRoot = 1;
+ if (child0->child[1]) child0->child[1]->isRoot = 1;
+ if (child1) child1->isRoot = 1;
+ return true;
+ }
+ if (child1 && child1->insn.getOpcode() == OP_MUL) {
+ GBE_ASSERT(cast<ir::BinaryInstruction>(child1->insn).getType() == TYPE_FLOAT);
+ const GenRegister src0 = sel.selReg(child1->insn.getSrc(0), TYPE_FLOAT);
+ const GenRegister src1 = sel.selReg(child1->insn.getSrc(1), TYPE_FLOAT);
+ const GenRegister src2 = sel.selReg(insn.getSrc(0), TYPE_FLOAT);
+ sel.MAD(dst, src2, src0, src1); // order different on HW!
+ if (child1->child[0]) child1->child[0]->isRoot = 1;
+ if (child1->child[1]) child1->child[1]->isRoot = 1;
+ if (child0) child0->isRoot = 1;
+ return true;
+ }
+ return false;
+ }
+ };
+
+ /*! sel.{le,l,ge...} like patterns */
+ class SelectModifierInstructionPattern : public SelectionPattern
+ {
+ public:
+ /*! Register the pattern for all opcodes of the family */
+ SelectModifierInstructionPattern(void) : SelectionPattern(2, 1) {
+ this->opcodes.push_back(ir::OP_SEL);
+ }
+
+ /*! Implements base class */
+ virtual bool emit(Selection::Opaque &sel, SelectionDAG &dag) const
+ {
+ using namespace ir;
+ SelectionDAG *cmp = dag.child[0];
+ const SelectInstruction &insn = cast<SelectInstruction>(dag.insn);
+
+ // Not in this block
+ if (cmp == NULL) return false;
+
+ // We need to match a compare
+ if (cmp->insn.isMemberOf<CompareInstruction>() == false) return false;
+
+ // We look for something like that:
+ // cmp.{le,ge...} flag src0 src1
+ // sel dst flag src0 src1
+ // So both sources must match
+ if (sourceMatch(cmp, 0, &dag, 1) == false) return false;
+ if (sourceMatch(cmp, 1, &dag, 2) == false) return false;
+
+ // OK, we merge the instructions
+ const ir::CompareInstruction &cmpInsn = cast<CompareInstruction>(cmp->insn);
+ const ir::Opcode opcode = cmpInsn.getOpcode();
+ const uint32_t genCmp = getGenCompare(opcode);
+
+ // Like for regular selects, we need a temporary since we cannot predicate
+ // properly
+ const ir::Type type = cmpInsn.getType();
+ const RegisterFamily family = getFamily(type);
+ const GenRegister tmp = sel.selReg(sel.reg(family), type);
+ const uint32_t simdWidth = sel.curr.execWidth;
+ const GenRegister dst = sel.selReg(insn.getDst(0), type);
+ const GenRegister src0 = sel.selReg(cmpInsn.getSrc(0), type);
+ const GenRegister src1 = sel.selReg(cmpInsn.getSrc(1), type);
+
+ sel.push();
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.execWidth = simdWidth;
+ sel.curr.physicalFlag = 0;
+ sel.SEL_CMP(genCmp, tmp, src0, src1);
+ sel.pop();
+
+ // Update the destination register properly now
+ sel.MOV(dst, tmp);
+
+ // We need the sources of the compare instruction
+ markAllChildren(*cmp);
+
+ return true;
+ }
+ };
+
+ /*! 32 bits integer multiply needs more instructions */
+ class Int32x32MulInstructionPattern : public SelectionPattern
+ {
+ public:
+ /*! Register the pattern for all opcodes of the family */
+ Int32x32MulInstructionPattern(void) : SelectionPattern(1, 4) {
+ this->opcodes.push_back(ir::OP_MUL);
+ }
+
+ /*! Implements base class */
+ virtual bool emit(Selection::Opaque &sel, SelectionDAG &dag) const
+ {
+ using namespace ir;
+ const ir::BinaryInstruction &insn = cast<ir::BinaryInstruction>(dag.insn);
+ const uint32_t simdWidth = sel.curr.execWidth;
+ const Type type = insn.getType();
+ if (type == TYPE_U32 || type == TYPE_S32) {
+ GenRegister dst = sel.selReg(insn.getDst(0), type);
+ GenRegister src0 = sel.selReg(insn.getSrc(0), type);
+ GenRegister src1 = sel.selReg(insn.getSrc(1), type);
+
+ sel.push();
+
+ // Either left part of the 16-wide register or just a simd 8 register
+ dst = GenRegister::retype(dst, GEN_TYPE_D);
+ src0 = GenRegister::retype(src0, GEN_TYPE_D);
+ src1 = GenRegister::retype(src1, GEN_TYPE_D);
+ sel.curr.execWidth = 8;
+ sel.curr.quarterControl = GEN_COMPRESSION_Q1;
+ sel.MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), src0, src1);
+ sel.curr.accWrEnable = 1;
+ sel.MACH(GenRegister::retype(GenRegister::null(), GEN_TYPE_D), src0, src1);
+ sel.curr.accWrEnable = 0;
+ sel.MOV(GenRegister::retype(dst, GEN_TYPE_F), GenRegister::acc());
+
+ // Right part of the 16-wide register now
+ if (simdWidth == 16) {
+ sel.curr.noMask = 1;
+ const GenRegister nextSrc0 = sel.selRegQn(insn.getSrc(0), 1, TYPE_S32);
+ const GenRegister nextSrc1 = sel.selRegQn(insn.getSrc(1), 1, TYPE_S32);
+ sel.MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), nextSrc0, nextSrc1);
+ sel.curr.accWrEnable = 1;
+ sel.MACH(GenRegister::retype(GenRegister::null(), GEN_TYPE_D), nextSrc0, nextSrc1);
+ sel.curr.accWrEnable = 0;
+ sel.curr.quarterControl = GEN_COMPRESSION_Q2;
+ const ir::Register reg = sel.reg(FAMILY_DWORD);
+ sel.MOV(GenRegister::f8grf(reg), GenRegister::acc());
+ sel.curr.noMask = 0;
+ sel.MOV(GenRegister::retype(GenRegister::next(dst), GEN_TYPE_F),
+ GenRegister::f8grf(reg));
+ }
+
+ sel.pop();
+
+ // All children are marked as root
+ markAllChildren(dag);
+ return true;
+ } else
+ return false;
+ }
+ };
+
+ /*! 32x16 bits integer can be done in one instruction */
+ class Int32x16MulInstructionPattern : public SelectionPattern
+ {
+ public:
+ /*! Register the pattern for all opcodes of the family */
+ Int32x16MulInstructionPattern(void) : SelectionPattern(1, 1) {
+ this->opcodes.push_back(ir::OP_MUL);
+ }
+
+ bool is16BitSpecialReg(ir::Register reg) const {
+ if (reg == ir::ocl::lid0 ||
+ reg == ir::ocl::lid1 ||
+ reg == ir::ocl::lid2 ||
+ reg == ir::ocl::lsize0 ||
+ reg == ir::ocl::lsize1||
+ reg == ir::ocl::lsize2)
+ return true;
+ else
+ return false;
+ }
+
+ /*! Try to emit a multiply where child childID is a 16 immediate */
+ bool emitMulImmediate(Selection::Opaque &sel, SelectionDAG &dag, uint32_t childID) const {
+ using namespace ir;
+ const ir::BinaryInstruction &insn = cast<ir::BinaryInstruction>(dag.insn);
+ const Register dst = insn.getDst(0);
+ const Register src1 = insn.getSrc(childID ^ 1);
+ const SelectionDAG *src0DAG = dag.child[childID];
+ if (src0DAG != NULL) {
+ if (src0DAG->insn.getOpcode() == OP_LOADI) {
+ const auto &loadimm = cast<LoadImmInstruction>(src0DAG->insn);
+ const Immediate imm = loadimm.getImmediate();
+ const Type type = imm.type;
+ GBE_ASSERT(type == TYPE_U32 || type == TYPE_S32);
+ if (type == TYPE_U32 && imm.data.u32 <= 0xffff) {
+ sel.MUL(sel.selReg(dst, type),
+ sel.selReg(src1, type),
+ GenRegister::immuw(imm.data.u32));
+ if (dag.child[childID ^ 1] != NULL)
+ dag.child[childID ^ 1]->isRoot = 1;
+ return true;
+ }
+ if (type == TYPE_S32 && (imm.data.s32 >= -32768 && imm.data.s32 <= 32767)) {
+ sel.MUL(sel.selReg(dst, type),
+ sel.selReg(src1, type),
+ GenRegister::immw(imm.data.s32));
+ if (dag.child[childID ^ 1] != NULL)
+ dag.child[childID ^ 1]->isRoot = 1;
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ /*! Try to emit a multiply with a 16 bit special register */
+ bool emitMulSpecialReg(Selection::Opaque &sel, SelectionDAG &dag, uint32_t childID) const {
+ using namespace ir;
+ const BinaryInstruction &insn = cast<ir::BinaryInstruction>(dag.insn);
+ const Type type = insn.getType();
+ const Register dst = insn.getDst(0);
+ const Register src0 = insn.getSrc(childID);
+ const Register src1 = insn.getSrc(childID ^ 1);
+ if (is16BitSpecialReg(src0)) {
+ sel.MUL(sel.selReg(dst, type),
+ sel.selReg(src1, type),
+ sel.selReg(src0, TYPE_U32));
+ markAllChildren(dag);
+ return true;
+ }
+ return false;
+ }
+
+ virtual bool emit(Selection::Opaque &sel, SelectionDAG &dag) const
+ {
+ using namespace ir;
+ const BinaryInstruction &insn = cast<ir::BinaryInstruction>(dag.insn);
+ const Type type = insn.getType();
+ if (type == TYPE_U32 || type == TYPE_S32) {
+ if (this->emitMulSpecialReg(sel, dag, 0))
+ return true;
+ if (this->emitMulSpecialReg(sel, dag, 1))
+ return true;
+ if (this->emitMulImmediate(sel, dag, 0))
+ return true;
+ if (this->emitMulImmediate(sel, dag, 1))
+ return true;
+ }
+ return false;
+ }
+ };
+
+#define DECL_NOT_IMPLEMENTED_ONE_TO_MANY(FAMILY) \
+ struct FAMILY##Pattern : public OneToManyPattern<FAMILY##Pattern, ir::FAMILY>\
+ {\
+ INLINE bool emitOne(Selection::Opaque &sel, const ir::FAMILY &insn) const {\
+ NOT_IMPLEMENTED;\
+ return false;\
+ }\
+ DECL_CTOR(FAMILY, 1, 1); \
+ }
+ DECL_NOT_IMPLEMENTED_ONE_TO_MANY(SampleInstruction);
+ DECL_NOT_IMPLEMENTED_ONE_TO_MANY(TypedWriteInstruction);
+#undef DECL_NOT_IMPLEMENTED_ONE_TO_MANY
+
+ /*! Load immediate pattern */
+ DECL_PATTERN(LoadImmInstruction)
+ {
+ INLINE bool emitOne(Selection::Opaque &sel, const ir::LoadImmInstruction &insn) const
+ {
+ using namespace ir;
+ const Type type = insn.getType();
+ const Immediate imm = insn.getImmediate();
+ const GenRegister dst = sel.selReg(insn.getDst(0), type);
+
+ switch (type) {
+ case TYPE_U32:
+ case TYPE_S32:
+ case TYPE_FLOAT:
+ sel.MOV(GenRegister::retype(dst, GEN_TYPE_F),
+ GenRegister::immf(imm.data.f32));
+ break;
+ case TYPE_U16: sel.MOV(dst, GenRegister::immuw(imm.data.u16)); break;
+ case TYPE_S16: sel.MOV(dst, GenRegister::immw(imm.data.s16)); break;
+ case TYPE_U8: sel.MOV(dst, GenRegister::immuw(imm.data.u8)); break;
+ case TYPE_S8: sel.MOV(dst, GenRegister::immw(imm.data.s8)); break;
+ default: NOT_SUPPORTED;
+ }
+ return true;
+ }
+
+ DECL_CTOR(LoadImmInstruction, 1,1);
+ };
+
+ /*! Sync instruction */
+ DECL_PATTERN(SyncInstruction)
+ {
+ INLINE bool emitOne(Selection::Opaque &sel, const ir::SyncInstruction &insn) const
+ {
+ using namespace ir;
+ const uint32_t params = insn.getParameters();
+ GBE_ASSERTM(params == syncLocalBarrier,
+ "Only barrier(CLK_LOCAL_MEM_FENCE) is supported right now "
+ "for the synchronization primitives");
+ const ir::Register reg = sel.reg(FAMILY_DWORD);
+
+ sel.push();
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.execWidth = 8;
+ sel.curr.physicalFlag = 0;
+ sel.curr.noMask = 1;
+ sel.SHL(GenRegister::ud8grf(reg),
+ GenRegister::ud1grf(ocl::threadn),
+ GenRegister::immud(0x9));
+ sel.OR(GenRegister::ud8grf(reg),
+ GenRegister::ud8grf(reg),
+ GenRegister::immud(0x00088000));
+ // A barrier is OK to start the thread synchronization *and* SLM fence
+ sel.BARRIER(GenRegister::f8grf(reg));
+ // Now we wait for the other threads
+ sel.curr.execWidth = 1;
+ sel.WAIT();
+ sel.pop();
+ return true;
+ }
+
+ DECL_CTOR(SyncInstruction, 1,1);
+ };
+
+ INLINE uint32_t getByteScatterGatherSize(ir::Type type) {
+ using namespace ir;
+ switch (type) {
+ case TYPE_FLOAT:
+ case TYPE_U32:
+ case TYPE_S32:
+ return GEN_BYTE_SCATTER_DWORD;
+ case TYPE_U16:
+ case TYPE_S16:
+ return GEN_BYTE_SCATTER_WORD;
+ case TYPE_U8:
+ case TYPE_S8:
+ return GEN_BYTE_SCATTER_BYTE;
+ default: NOT_SUPPORTED;
+ return GEN_BYTE_SCATTER_BYTE;
+ }
+ }
+
+ /*! Load instruction pattern */
+ DECL_PATTERN(LoadInstruction)
+ {
+ void emitUntypedRead(Selection::Opaque &sel,
+ const ir::LoadInstruction &insn,
+ GenRegister addr,
+ uint32_t bti) const
+ {
+ using namespace ir;
+ const uint32_t valueNum = insn.getValueNum();
+ GenRegister dst[valueNum];
+ for (uint32_t dstID = 0; dstID < valueNum; ++dstID)
+ dst[dstID] = GenRegister::retype(sel.selReg(insn.getValue(dstID)), GEN_TYPE_F);
+ sel.UNTYPED_READ(addr, dst, valueNum, bti);
+ }
+
+ void emitByteGather(Selection::Opaque &sel,
+ const ir::LoadInstruction &insn,
+ GenRegister address,
+ GenRegister value,
+ uint32_t bti) const
+ {
+ using namespace ir;
+ GBE_ASSERT(insn.getValueNum() == 1);
+ const Type type = insn.getValueType();
+ const uint32_t elemSize = getByteScatterGatherSize(type);
+ const uint32_t simdWidth = sel.ctx.getSimdWidth();
+
+ // We need a temporary register if we read bytes or words
+ Register dst = Register(value.value.reg);
+ if (elemSize == GEN_BYTE_SCATTER_WORD ||
+ elemSize == GEN_BYTE_SCATTER_BYTE) {
+ dst = sel.reg(FAMILY_DWORD);
+ sel.BYTE_GATHER(GenRegister::fxgrf(simdWidth, dst), address, elemSize, bti);
+ }
+
+ // Repack bytes or words using a converting mov instruction
+ if (elemSize == GEN_BYTE_SCATTER_WORD)
+ sel.MOV(GenRegister::retype(value, GEN_TYPE_UW), GenRegister::unpacked_uw(dst));
+ else if (elemSize == GEN_BYTE_SCATTER_BYTE)
+ sel.MOV(GenRegister::retype(value, GEN_TYPE_UB), GenRegister::unpacked_ub(dst));
+ }
+
+ INLINE bool emitOne(Selection::Opaque &sel, const ir::LoadInstruction &insn) const {
+ using namespace ir;
+ const GenRegister address = sel.selReg(insn.getAddress());
+ const AddressSpace space = insn.getAddressSpace();
+ GBE_ASSERT(insn.getAddressSpace() == MEM_GLOBAL ||
+ insn.getAddressSpace() == MEM_PRIVATE ||
+ insn.getAddressSpace() == MEM_LOCAL);
+ GBE_ASSERT(sel.ctx.isScalarReg(insn.getValue(0)) == false);
+ if (insn.isAligned() == true)
+ this->emitUntypedRead(sel, insn, address, space == MEM_LOCAL ? 0xfe : 0x00);
+ else {
+ const GenRegister value = sel.selReg(insn.getValue(0));
+ this->emitByteGather(sel, insn, address, value, space == MEM_LOCAL ? 0xfe : 0x01);
+ }
+ return true;
+ }
+ DECL_CTOR(LoadInstruction, 1, 1);
+ };
+
+ /*! Store instruction pattern */
+ DECL_PATTERN(StoreInstruction)
+ {
+ void emitUntypedWrite(Selection::Opaque &sel,
+ const ir::StoreInstruction &insn,
+ uint32_t bti) const
+ {
+ using namespace ir;
+ const uint32_t valueNum = insn.getValueNum();
+ const uint32_t addrID = ir::StoreInstruction::addressIndex;
+ GenRegister addr, value[valueNum];
+
+ addr = GenRegister::retype(sel.selReg(insn.getSrc(addrID)), GEN_TYPE_F);;
+ for (uint32_t valueID = 0; valueID < valueNum; ++valueID)
+ value[valueID] = GenRegister::retype(sel.selReg(insn.getValue(valueID)), GEN_TYPE_F);
+ sel.UNTYPED_WRITE(addr, value, valueNum, bti);
+ }
+
+ void emitByteScatter(Selection::Opaque &sel,
+ const ir::StoreInstruction &insn,
+ GenRegister addr,
+ GenRegister value,
+ uint32_t bti) const
+ {
+ using namespace ir;
+ const Type type = insn.getValueType();
+ const uint32_t elemSize = getByteScatterGatherSize(type);
+ const uint32_t simdWidth = sel.ctx.getSimdWidth();
+ const GenRegister dst = value;
+
+ GBE_ASSERT(insn.getValueNum() == 1);
+ if (elemSize == GEN_BYTE_SCATTER_WORD) {
+ value = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD));
+ sel.MOV(value, GenRegister::retype(dst, GEN_TYPE_UW));
+ } else if (elemSize == GEN_BYTE_SCATTER_BYTE) {
+ value = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD));
+ sel.MOV(value, GenRegister::retype(dst, GEN_TYPE_UB));
+ }
+ sel.BYTE_SCATTER(addr, value, elemSize, bti);
+ }
+
+ INLINE bool emitOne(Selection::Opaque &sel, const ir::StoreInstruction &insn) const
+ {
+ using namespace ir;
+ const AddressSpace space = insn.getAddressSpace();
+ const uint32_t bti = space == MEM_LOCAL ? 0xfe : 0x01;
+ if (insn.isAligned() == true)
+ this->emitUntypedWrite(sel, insn, bti);
+ else {
+ const GenRegister address = sel.selReg(insn.getAddress());
+ const GenRegister value = sel.selReg(insn.getValue(0));
+ this->emitByteScatter(sel, insn, address, value, bti);
+ }
+ return true;
+ }
+ DECL_CTOR(StoreInstruction, 1, 1);
+ };
+
+ /*! Compare instruction pattern */
+ class CompareInstructionPattern : public SelectionPattern
+ {
+ public:
+ CompareInstructionPattern(void) : SelectionPattern(1,1) {
+ for (uint32_t op = 0; op < ir::OP_INVALID; ++op)
+ if (ir::isOpcodeFrom<ir::CompareInstruction>(ir::Opcode(op)) == true)
+ this->opcodes.push_back(ir::Opcode(op));
+ }
+
+ INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const
+ {
+ using namespace ir;
+ const ir::CompareInstruction &insn = cast<CompareInstruction>(dag.insn);
+ const Opcode opcode = insn.getOpcode();
+ const Type type = insn.getType();
+ const uint32_t genCmp = getGenCompare(opcode);
+ const Register dst = insn.getDst(0);
+
+ // Limit the compare to the active lanes. Use the same compare as for f0.0
+ sel.push();
+ const LabelIndex label = insn.getParent()->getLabelIndex();
+ const GenRegister blockip = sel.selReg(ocl::blockip, TYPE_U16);
+ const GenRegister labelReg = GenRegister::immuw(label);
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.physicalFlag = 0;
+ sel.curr.flagIndex = uint16_t(dst);
+ sel.CMP(GEN_CONDITIONAL_LE, blockip, labelReg);
+ sel.pop();
+
+ // Look for immediate values for the right source
+ GenRegister src0, src1;
+ SelectionDAG *dag0 = dag.child[0];
+ SelectionDAG *dag1 = dag.child[1];
+
+ // Right source can always be an immediate
+ if (OCL_OPTIMIZE_IMMEDIATE && dag1 != NULL && dag1->insn.getOpcode() == OP_LOADI) {
+ const auto &childInsn = cast<LoadImmInstruction>(dag1->insn);
+ src0 = sel.selReg(insn.getSrc(0), type);
+ src1 = getRegisterFromImmediate(childInsn.getImmediate());
+ if (dag0) dag0->isRoot = 1;
+ } else {
+ src0 = sel.selReg(insn.getSrc(0), type);
+ src1 = sel.selReg(insn.getSrc(1), type);
+ markAllChildren(dag);
+ }
+
+ sel.push();
+ sel.curr.physicalFlag = 0;
+ sel.curr.flagIndex = uint16_t(dst);
+ sel.CMP(genCmp, src0, src1);
+ sel.pop();
+ return true;
+ }
+ };
+
+ /*! Convert instruction pattern */
+ DECL_PATTERN(ConvertInstruction)
+ {
+ INLINE bool emitOne(Selection::Opaque &sel, const ir::ConvertInstruction &insn) const
+ {
+ using namespace ir;
+ const Type dstType = insn.getDstType();
+ const Type srcType = insn.getSrcType();
+ const RegisterFamily dstFamily = getFamily(dstType);
+ const RegisterFamily srcFamily = getFamily(srcType);
+ const GenRegister dst = sel.selReg(insn.getDst(0), dstType);
+ const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
+
+ // We need two instructions to make the conversion
+ if (dstFamily != FAMILY_DWORD && srcFamily == FAMILY_DWORD) {
+ GenRegister unpacked;
+ if (dstFamily == FAMILY_WORD) {
+ const uint32_t type = TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W;
+ unpacked = GenRegister::unpacked_uw(sel.reg(FAMILY_DWORD));
+ unpacked = GenRegister::retype(unpacked, type);
+ } else {
+ const uint32_t type = TYPE_U8 ? GEN_TYPE_UB : GEN_TYPE_B;
+ unpacked = GenRegister::unpacked_ub(sel.reg(FAMILY_DWORD));
+ unpacked = GenRegister::retype(unpacked, type);
+ }
+ sel.MOV(unpacked, src);
+ sel.MOV(dst, unpacked);
+ } else
+ sel.MOV(dst, src);
+ return true;
+ }
+ DECL_CTOR(ConvertInstruction, 1, 1);
+ };
+
+ /*! Select instruction pattern */
+ class SelectInstructionPattern : public SelectionPattern
+ {
+ public:
+ SelectInstructionPattern(void) : SelectionPattern(1,1) {
+ for (uint32_t op = 0; op < ir::OP_INVALID; ++op)
+ if (ir::isOpcodeFrom<ir::SelectInstruction>(ir::Opcode(op)) == true)
+ this->opcodes.push_back(ir::Opcode(op));
+ }
+
+ INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const
+ {
+ using namespace ir;
+ const ir::SelectInstruction &insn = cast<SelectInstruction>(dag.insn);
+
+ // Get all registers for the instruction
+ const Type type = insn.getType();
+ const GenRegister dst = sel.selReg(insn.getDst(0), type);
+
+ // Look for immediate values for the right source
+ GenRegister src0, src1;
+ SelectionDAG *dag0 = dag.child[0]; // source 0 is the predicate!
+ SelectionDAG *dag1 = dag.child[1];
+ SelectionDAG *dag2 = dag.child[2];
+
+ // Right source can always be an immediate
+ if (OCL_OPTIMIZE_IMMEDIATE && dag2 != NULL && dag2->insn.getOpcode() == OP_LOADI) {
+ const auto &childInsn = cast<LoadImmInstruction>(dag2->insn);
+ src0 = sel.selReg(insn.getSrc(SelectInstruction::src0Index), type);
+ src1 = getRegisterFromImmediate(childInsn.getImmediate());
+ if (dag0) dag0->isRoot = 1;
+ if (dag1) dag1->isRoot = 1;
+ } else {
+ src0 = sel.selReg(insn.getSrc(SelectInstruction::src0Index), type);
+ src1 = sel.selReg(insn.getSrc(SelectInstruction::src1Index), type);
+ markAllChildren(dag);
+ }
+
+ // Since we cannot predicate the select instruction with our current mask,
+ // we need to perform the selection in two steps (one to select, one to
+ // update the destination register)
+ const RegisterFamily family = getFamily(type);
+ const GenRegister tmp = sel.selReg(sel.reg(family), type);
+ const uint32_t simdWidth = sel.ctx.getSimdWidth();
+ const Register pred = insn.getPredicate();
+ sel.push();
+ sel.curr.predicate = GEN_PREDICATE_NORMAL;
+ sel.curr.execWidth = simdWidth;
+ sel.curr.physicalFlag = 0;
+ sel.curr.flagIndex = uint16_t(pred);
+ sel.curr.noMask = 0;
+ sel.SEL(tmp, src0, src1);
+ sel.pop();
+
+ // Update the destination register properly now
+ sel.MOV(dst, tmp);
+ return true;
+ }
+ };
+
+ /*! Label instruction pattern */
+ DECL_PATTERN(LabelInstruction)
+ {
+ INLINE bool emitOne(Selection::Opaque &sel, const ir::LabelInstruction &insn) const
+ {
+ using namespace ir;
+ const LabelIndex label = insn.getLabelIndex();
+ const GenRegister src0 = sel.selReg(ocl::blockip);
+ const GenRegister src1 = GenRegister::immuw(label);
+ const uint32_t simdWidth = sel.ctx.getSimdWidth();
+ sel.LABEL(label);
+
+ // Do not emit any code for the "returning" block. There is no need for it
+ if (insn.getParent() == &sel.ctx.getFunction().getBottomBlock())
+ return true;
+
+ // Emit the mask computation at the head of each basic block
+ sel.push();
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.flag = 0;
+ sel.curr.subFlag = 0;
+ sel.CMP(GEN_CONDITIONAL_LE, GenRegister::retype(src0, GEN_TYPE_UW), src1);
+ sel.pop();
+
+ // If it is required, insert a JUMP to bypass the block
+ if (sel.ctx.hasJIP(&insn)) {
+ const LabelIndex jip = sel.ctx.getLabelIndex(&insn);
+ sel.push();
+ if (simdWidth == 8)
+ sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H;
+ else if (simdWidth == 16)
+ sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H;
+ else
+ NOT_IMPLEMENTED;
+ sel.curr.inversePredicate = 1;
+ sel.curr.execWidth = 1;
+ sel.curr.flag = 0;
+ sel.curr.subFlag = 0;
+ sel.curr.noMask = 1;
+ sel.JMPI(GenRegister::immd(0), jip);
+ sel.pop();
+ }
+ return true;
+ }
+ DECL_CTOR(LabelInstruction, 1, 1);
+ };
+
+ /*! Branch instruction pattern */
+ DECL_PATTERN(BranchInstruction)
+ {
+ void emitForwardBranch(Selection::Opaque &sel,
+ const ir::BranchInstruction &insn,
+ ir::LabelIndex dst,
+ ir::LabelIndex src) const
+ {
+ using namespace ir;
+ const GenRegister ip = sel.selReg(ocl::blockip, TYPE_U16);
+ const LabelIndex jip = sel.ctx.getLabelIndex(&insn);
+ const uint32_t simdWidth = sel.ctx.getSimdWidth();
+
+ // We will not emit any jump if we must go the next block anyway
+ const BasicBlock *curr = insn.getParent();
+ const BasicBlock *next = curr->getNextBlock();
+ const LabelIndex nextLabel = next->getLabelIndex();
+
+ if (insn.isPredicated() == true) {
+ const Register pred = insn.getPredicateIndex();
+
+ // Update the PcIPs
+ sel.push();
+ sel.curr.physicalFlag = 0;
+ sel.curr.flagIndex = uint16_t(pred);
+ sel.MOV(ip, GenRegister::immuw(uint16_t(dst)));
+ sel.pop();
+
+ if (nextLabel == jip) return;
+
+ // It is slightly more complicated than for backward jump. We check that
+ // all PcIPs are greater than the next block IP to be sure that we can
+ // jump
+ sel.push();
+ sel.curr.physicalFlag = 0;
+ sel.curr.flagIndex = uint16_t(pred);
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.CMP(GEN_CONDITIONAL_G, ip, GenRegister::immuw(nextLabel));
+
+ // Branch to the jump target
+ if (simdWidth == 8)
+ sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H;
+ else if (simdWidth == 16)
+ sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL16H;
+ else
+ NOT_SUPPORTED;
+ sel.curr.execWidth = 1;
+ sel.curr.noMask = 1;
+ sel.JMPI(GenRegister::immd(0), jip);
+ sel.pop();
+
+ } else {
+ // Update the PcIPs
+ sel.MOV(ip, GenRegister::immuw(uint16_t(dst)));
+
+ // Do not emit branch when we go to the next block anyway
+ if (nextLabel == jip) return;
+ sel.push();
+ sel.curr.execWidth = 1;
+ sel.curr.noMask = 1;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.JMPI(GenRegister::immd(0), jip);
+ sel.pop();
+ }
+ }
+
+ void emitBackwardBranch(Selection::Opaque &sel,
+ const ir::BranchInstruction &insn,
+ ir::LabelIndex dst,
+ ir::LabelIndex src) const
+ {
+ using namespace ir;
+ const GenRegister ip = sel.selReg(ocl::blockip, TYPE_U16);
+ const Function &fn = sel.ctx.getFunction();
+ const BasicBlock &bb = fn.getBlock(src);
+ const LabelIndex jip = sel.ctx.getLabelIndex(&insn);
+ const uint32_t simdWidth = sel.ctx.getSimdWidth();
+ GBE_ASSERT(bb.getNextBlock() != NULL);
+
+ if (insn.isPredicated() == true) {
+ const Register pred = insn.getPredicateIndex();
+
+ // Update the PcIPs for all the branches. Just put the IPs of the next
+ // block. Next instruction will properly reupdate the IPs of the lanes
+ // that actually take the branch
+ const LabelIndex next = bb.getNextBlock()->getLabelIndex();
+ sel.MOV(ip, GenRegister::immuw(uint16_t(next)));
+
+ sel.push();
+ // Re-update the PcIPs for the branches that takes the backward jump
+ sel.curr.physicalFlag = 0;
+ sel.curr.flagIndex = uint16_t(pred);
+ sel.MOV(ip, GenRegister::immuw(uint16_t(dst)));
+
+ // Branch to the jump target
+ if (simdWidth == 8)
+ sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H;
+ else if (simdWidth == 16)
+ sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H;
+ else
+ NOT_SUPPORTED;
+ sel.curr.execWidth = 1;
+ sel.curr.noMask = 1;
+ sel.JMPI(GenRegister::immd(0), jip);
+ sel.pop();
+
+ } else {
+
+ // Update the PcIPs
+ sel.MOV(ip, GenRegister::immuw(uint16_t(dst)));
+
+ // Branch to the jump target
+ sel.push();
+ sel.curr.execWidth = 1;
+ sel.curr.noMask = 1;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.JMPI(GenRegister::immd(0), jip);
+ sel.pop();
+ }
+ }
+
+ INLINE bool emitOne(Selection::Opaque &sel, const ir::BranchInstruction &insn) const {
+ using namespace ir;
+ const Opcode opcode = insn.getOpcode();
+ if (opcode == OP_RET)
+ sel.EOT();
+ else if (opcode == OP_BRA) {
+ const LabelIndex dst = insn.getLabelIndex();
+ const LabelIndex src = insn.getParent()->getLabelIndex();
+
+ // We handle foward and backward branches differently
+ if (uint32_t(dst) <= uint32_t(src))
+ this->emitBackwardBranch(sel, insn, dst, src);
+ else
+ this->emitForwardBranch(sel, insn, dst, src);
+ } else
+ NOT_IMPLEMENTED;
+ return true;
+ }
+
+ DECL_CTOR(BranchInstruction, 1, 1);
+ };
+
+ /*! Sort patterns */
+ INLINE bool cmp(const SelectionPattern *p0, const SelectionPattern *p1) {
+ if (p0->insnNum != p1->insnNum)
+ return p0->insnNum > p1->insnNum;
+ return p0->cost < p1->cost;
+ }
+
+ SelectionLibrary::SelectionLibrary(void) {
+ this->insert<UnaryInstructionPattern>();
+ this->insert<BinaryInstructionPattern>();
+ this->insert<SampleInstructionPattern>();
+ this->insert<TypedWriteInstructionPattern>();
+ this->insert<SyncInstructionPattern>();
+ this->insert<LoadImmInstructionPattern>();
+ this->insert<LoadInstructionPattern>();
+ this->insert<StoreInstructionPattern>();
+ this->insert<SelectInstructionPattern>();
+ this->insert<CompareInstructionPattern>();
+ this->insert<ConvertInstructionPattern>();
+ this->insert<LabelInstructionPattern>();
+ this->insert<BranchInstructionPattern>();
+ this->insert<Int32x32MulInstructionPattern>();
+ this->insert<Int32x16MulInstructionPattern>();
+ this->insert<MulAddInstructionPattern>();
+ this->insert<SelectModifierInstructionPattern>();
+
+ // Sort all the patterns with the number of instructions they output
+ for (uint32_t op = 0; op < ir::OP_INVALID; ++op)
+ std::sort(this->patterns[op].begin(), this->patterns[op].end(), cmp);
+ }
+
+ SelectionLibrary::~SelectionLibrary(void) {
+ for (auto pattern : this->toFree)
+ GBE_DELETE(const_cast<SelectionPattern*>(pattern));
+ }
+
+ template <typename PatternType>
+ void SelectionLibrary::insert(void) {
+ const SelectionPattern *pattern = GBE_NEW_NO_ARG(PatternType);
+ this->toFree.push_back(pattern);
+ for (auto opcode : pattern->opcodes)
+ this->patterns[opcode].push_back(pattern);
+ }
+
+} /* namespace gbe */
+
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
new file mode 100644
index 0000000..49a7954
--- /dev/null
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -0,0 +1,215 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file gen_insn_selection.hpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __GEN_INSN_SELECTION_HPP__
+#define __GEN_INSN_SELECTION_HPP__
+
+#include "ir/register.hpp"
+#include "ir/instruction.hpp"
+#include "backend/gen_register.hpp"
+#include "backend/gen_encoder.hpp"
+#include "backend/gen_context.hpp"
+#include "sys/vector.hpp"
+#include "sys/intrusive_list.hpp"
+
+namespace gbe
+{
+ /*! Translate IR type to Gen type */
+ uint32_t getGenType(ir::Type type);
+
+ /*! Translate IR compare to Gen compare */
+ uint32_t getGenCompare(ir::Opcode opcode);
+
+ /*! Selection opcodes properly encoded from 0 to n for fast jump tables
+ * generations
+ */
+ enum SelectionOpcode {
+#define DECL_SELECTION_IR(OP, FN) SEL_OP_##OP,
+#include "backend/gen_insn_selection.hxx"
+#undef DECL_SELECTION_IR
+ };
+
+ // Owns and Allocates selection instructions
+ class Selection;
+
+ // List of SelectionInstruction forms a block
+ class SelectionBlock;
+
+ /*! A selection instruction is also almost a Gen instruction but *before* the
+ * register allocation
+ */
+ class SelectionInstruction : public NonCopyable, public intrusive_list_node
+ {
+ public:
+ /*! Owns the instruction */
+ SelectionBlock *parent;
+ /*! Append an instruction before this one */
+ void prepend(SelectionInstruction &insn);
+ /*! Append an instruction after this one */
+ void append(SelectionInstruction &insn);
+ /*! Does it read memory? */
+ bool isRead(void) const;
+ /*! Does it write memory? */
+ bool isWrite(void) const;
+ /*! Is it a branch instruction (i.e. modify control flow) */
+ bool isBranch(void) const;
+ /*! Is it a label instruction (i.e. change the implicit mask) */
+ bool isLabel(void) const;
+ /*! Get the destination register */
+ GenRegister &dst(uint32_t dstID) { return regs[dstID]; }
+ /*! Get the source register */
+ GenRegister &src(uint32_t srcID) { return regs[dstNum+srcID]; }
+ /*! Damn C++ */
+ const GenRegister &dst(uint32_t dstID) const { return regs[dstID]; }
+ /*! Damn C++ */
+ const GenRegister &src(uint32_t srcID) const { return regs[dstNum+srcID]; }
+ /*! No more than 6 sources (used by typed writes) */
+ enum { MAX_SRC_NUM = 8 };
+ /*! No more than 4 destinations (used by samples and untyped reads) */
+ enum { MAX_DST_NUM = 4 };
+ /*! State of the instruction (extra fields neeed for the encoding) */
+ GenInstructionState state;
+ union {
+ struct {
+ /*! Store bti for loads/stores and function for math and compares */
+ uint16_t function:8;
+ /*! elemSize for byte scatters / gathers, elemNum for untyped msg */
+ uint16_t elem:8;
+ };
+ struct {
+ /*! Number of sources in the tuple */
+ uint16_t width:4;
+ /*! vertical stride (0,1,2,4,8 or 16) */
+ uint16_t vstride:5;
+ /*! horizontal stride (0,1,2,4,8 or 16) */
+ uint16_t hstride:5;
+ /*! offset (0 to 7) */
+ uint16_t offset:5;
+ };
+ } extra;
+ /*! Gen opcode */
+ uint8_t opcode;
+ /*! Number of destinations */
+ uint8_t dstNum:4;
+ /*! Number of sources */
+ uint8_t srcNum:4;
+ /*! To store various indices */
+ uint16_t index;
+ /*! Variable sized. Destinations and sources go here */
+ GenRegister regs[];
+ private:
+ /*! Just Selection class can create SelectionInstruction */
+ SelectionInstruction(SelectionOpcode, uint32_t dstNum, uint32_t srcNum);
+ // Allocates (with a linear allocator) and owns SelectionInstruction
+ friend class Selection;
+ };
+
+ /*! Instructions like sends require to make registers contiguous in GRF */
+ class SelectionVector : public NonCopyable, public intrusive_list_node
+ {
+ public:
+ SelectionVector(void);
+ /*! The instruction that requires the vector of registers */
+ SelectionInstruction *insn;
+ /*! Directly points to the selection instruction registers */
+ GenRegister *reg;
+ /*! Number of registers in the vector */
+ uint16_t regNum;
+ /*! Indicate if this a destination or a source vector */
+ uint16_t isSrc;
+ };
+
+ // Owns the selection block
+ class Selection;
+
+ /*! A selection block is the counterpart of the IR Basic block. It contains
+ * the instructions generated from an IR basic block
+ */
+ class SelectionBlock : public NonCopyable, public intrusive_list_node
+ {
+ public:
+ SelectionBlock(const ir::BasicBlock *bb);
+ /*! All the emitted instructions in the block */
+ intrusive_list<SelectionInstruction> insnList;
+ /*! The vectors that may be required by some instructions of the block */
+ intrusive_list<SelectionVector> vectorList;
+ /*! Extra registers needed by the block (only live in the block) */
+ gbe::vector<ir::Register> tmp;
+ /*! Associated IR basic block */
+ const ir::BasicBlock *bb;
+ /*! Append a new temporary register */
+ void append(ir::Register reg);
+ /*! Append a new selection vector in the block */
+ void append(SelectionVector *vec);
+ /*! Append a new selection instruction at the end of the block */
+ void append(SelectionInstruction *insn);
+ /*! Append a new selection instruction at the beginning of the block */
+ void prepend(SelectionInstruction *insn);
+ };
+
+ /*! Owns the selection engine */
+ class GenContext;
+
+ /*! Selection engine produces the pre-ISA instruction blocks */
+ class Selection
+ {
+ public:
+ /*! Initialize internal structures used for the selection */
+ Selection(GenContext &ctx);
+ /*! Release everything */
+ ~Selection(void);
+ /*! Implements the instruction selection itself */
+ void select(void);
+ /*! Bool and scalar register use scalar physical registers */
+ bool isScalarOrBool(ir::Register reg) const;
+ /*! Get the number of instructions of the largest block */
+ uint32_t getLargestBlockSize(void) const;
+ /*! Number of register vectors in the selection */
+ uint32_t getVectorNum(void) const;
+ /*! Number of registers (temporaries are created during selection) */
+ uint32_t getRegNum(void) const;
+ /*! Get the family for the given register */
+ ir::RegisterFamily getRegisterFamily(ir::Register reg) const;
+ /*! Get the data for the given register */
+ ir::RegisterData getRegisterData(ir::Register reg) const;
+ /*! Replace a source by the returned temporary register */
+ ir::Register replaceSrc(SelectionInstruction *insn, uint32_t regID);
+ /*! Replace a destination to the returned temporary register */
+ ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID);
+ /*! Create a new selection instruction */
+ SelectionInstruction *create(SelectionOpcode, uint32_t dstNum, uint32_t srcNum);
+ /*! List of emitted blocks */
+ intrusive_list<SelectionBlock> *blockList;
+ /*! Actual implementation of the register allocator (use Pimpl) */
+ class Opaque;
+ /*! Created and destroyed in cpp */
+ Opaque *opaque;
+ /*! Use custom allocator */
+ GBE_CLASS(Selection);
+ };
+
+} /* namespace gbe */
+
+#endif /* __GEN_INSN_SELECTION_HPP__ */
+
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
new file mode 100644
index 0000000..2d14e21
--- /dev/null
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -0,0 +1,35 @@
+DECL_SELECTION_IR(LABEL, LabelInstruction)
+DECL_SELECTION_IR(MOV, UnaryInstruction)
+DECL_SELECTION_IR(NOT, UnaryInstruction)
+DECL_SELECTION_IR(LZD, UnaryInstruction)
+DECL_SELECTION_IR(RNDZ, UnaryInstruction)
+DECL_SELECTION_IR(RNDE, UnaryInstruction)
+DECL_SELECTION_IR(RNDD, UnaryInstruction)
+DECL_SELECTION_IR(RNDU, UnaryInstruction)
+DECL_SELECTION_IR(FRC, UnaryInstruction)
+DECL_SELECTION_IR(SEL, BinaryInstruction)
+DECL_SELECTION_IR(AND, BinaryInstruction)
+DECL_SELECTION_IR(OR, BinaryInstruction)
+DECL_SELECTION_IR(XOR, BinaryInstruction)
+DECL_SELECTION_IR(SHR, BinaryInstruction)
+DECL_SELECTION_IR(SHL, BinaryInstruction)
+DECL_SELECTION_IR(RSR, BinaryInstruction)
+DECL_SELECTION_IR(RSL, BinaryInstruction)
+DECL_SELECTION_IR(ASR, BinaryInstruction)
+DECL_SELECTION_IR(ADD, BinaryInstruction)
+DECL_SELECTION_IR(MUL, BinaryInstruction)
+DECL_SELECTION_IR(MACH, BinaryInstruction)
+DECL_SELECTION_IR(CMP, CompareInstruction)
+DECL_SELECTION_IR(SEL_CMP, CompareInstruction)
+DECL_SELECTION_IR(MAD, TernaryInstruction)
+DECL_SELECTION_IR(JMPI, JumpInstruction)
+DECL_SELECTION_IR(EOT, EotInstruction)
+DECL_SELECTION_IR(NOP, NoOpInstruction)
+DECL_SELECTION_IR(WAIT, WaitInstruction)
+DECL_SELECTION_IR(MATH, MathInstruction)
+DECL_SELECTION_IR(BARRIER, BarrierInstruction)
+DECL_SELECTION_IR(UNTYPED_READ, UntypedReadInstruction)
+DECL_SELECTION_IR(UNTYPED_WRITE, UntypedWriteInstruction)
+DECL_SELECTION_IR(BYTE_GATHER, ByteGatherInstruction)
+DECL_SELECTION_IR(BYTE_SCATTER, ByteScatterInstruction)
+
diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp
new file mode 100644
index 0000000..8a7efdb
--- /dev/null
+++ b/backend/src/backend/gen_program.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file program.cpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "backend/program.h"
+#include "backend/gen_program.h"
+#include "backend/gen_program.hpp"
+#include "backend/gen_context.hpp"
+#include "backend/gen_defs.hpp"
+#include "backend/gen_reg_allocation.hpp"
+#include "ir/unit.hpp"
+#include "llvm/llvm_to_gen.hpp"
+
+#include <cstring>
+#include <memory>
+
+namespace gbe {
+
+ GenKernel::GenKernel(const std::string &name) :
+ Kernel(name), insns(NULL), insnNum(0)
+ {}
+ GenKernel::~GenKernel(void) { GBE_SAFE_DELETE_ARRAY(insns); }
+ const char *GenKernel::getCode(void) const { return (const char*) insns; }
+ size_t GenKernel::getCodeSize(void) const { return insnNum * sizeof(GenInstruction); }
+
+ GenProgram::GenProgram(void) {}
+ GenProgram::~GenProgram(void) {}
+
+ /*! We must avoid spilling at all cost with Gen */
+ static const struct CodeGenStrategy {
+ uint32_t simdWidth;
+ bool limitRegisterPressure;
+ } codeGenStrategy[] = {
+ {16,false},
+ {16,true},
+ {8,false},
+ {8,true},
+ };
+
+ Kernel *GenProgram::compileKernel(const ir::Unit &unit, const std::string &name) {
+
+ // Be careful when the simdWidth is forced by the programmer. We can see it
+ // when the function already provides the simd width we need to use (i.e.
+ // non zero)
+ const ir::Function *fn = unit.getFunction(name);
+ const uint32_t codeGenNum = fn->getSimdWidth() != 0 ? 2 : 4;
+ uint32_t codeGen = fn->getSimdWidth() == 8 ? 2 : 0;
+ Kernel *kernel = NULL;
+
+ // Stop when compilation is successful
+ for (; codeGen < codeGenNum; ++codeGen) {
+ const uint32_t simdWidth = codeGenStrategy[codeGen].simdWidth;
+ const bool limitRegisterPressure = codeGenStrategy[codeGen].limitRegisterPressure;
+
+ // Force the SIMD width now and try to compile
+ unit.getFunction(name)->setSimdWidth(simdWidth);
+ Context *ctx = GBE_NEW(GenContext, unit, name, limitRegisterPressure);
+ kernel = ctx->compileKernel();
+ GBE_DELETE(ctx);
+ if (kernel != NULL)
+ break;
+ }
+
+ // XXX spill must be implemented
+ GBE_ASSERTM(kernel != NULL, "Register spilling not supported yet!");
+ return kernel;
+ }
+
+ static gbe_program genProgramNewFromBinary(const char *binary, size_t size) {
+ NOT_IMPLEMENTED;
+ return NULL;
+ }
+
+ static gbe_program genProgramNewFromLLVM(const char *fileName,
+ size_t stringSize,
+ char *err,
+ size_t *errSize)
+ {
+ using namespace gbe;
+ GenProgram *program = GBE_NEW_NO_ARG(GenProgram);
+ std::string error;
+ // Try to compile the program
+ if (program->buildFromLLVMFile(fileName, error) == false) {
+ if (err != NULL && errSize != NULL && stringSize > 0u) {
+ const size_t msgSize = std::min(error.size(), stringSize-1u);
+ std::memcpy(err, error.c_str(), msgSize);
+ *errSize = error.size();
+ }
+ GBE_DELETE(program);
+ return NULL;
+ }
+ // Everything run fine
+ return (gbe_program) program;
+ }
+} /* namespace gbe */
+
+void genSetupCallBacks(void)
+{
+ gbe_program_new_from_binary = gbe::genProgramNewFromBinary;
+ gbe_program_new_from_llvm = gbe::genProgramNewFromLLVM;
+}
+
diff --git a/backend/src/backend/gen_program.h b/backend/src/backend/gen_program.h
new file mode 100644
index 0000000..9fae2e7
--- /dev/null
+++ b/backend/src/backend/gen_program.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file program.h
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ *
+ * C-like interface for the gen kernels and programs
+ */
+
+#ifndef __GBE_GEN_PROGRAM_H__
+#define __GBE_GEN_PROGRAM_H__
+
+#include <stdint.h>
+#include <stdlib.h>
+
+/*! This will make the compiler output Gen ISA code */
+extern void genSetupCallBacks(void);
+
+#endif /* __GBE_GEN_PROGRAM_H__ */
+
diff --git a/backend/src/backend/gen_program.hpp b/backend/src/backend/gen_program.hpp
new file mode 100644
index 0000000..68b0427
--- /dev/null
+++ b/backend/src/backend/gen_program.hpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file program.hpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __GBE_GEN_PROGRAM_HPP__
+#define __GBE_GEN_PROGRAM_HPP__
+
+#include "backend/program.h"
+#include "backend/program.hpp"
+
+// Gen ISA instruction
+struct GenInstruction;
+namespace gbe
+{
+ /*! Describe a compiled kernel */
+ class GenKernel : public Kernel
+ {
+ public:
+ /*! Create an empty kernel with the given name */
+ GenKernel(const std::string &name);
+ /*! Destroy it */
+ virtual ~GenKernel(void);
+ /*! Implements base class */
+ virtual const char *getCode(void) const;
+ /*! Implements base class */
+ virtual size_t getCodeSize(void) const;
+ GenInstruction *insns; //!< Instruction stream
+ uint32_t insnNum; //!< Number of instructions
+ GBE_CLASS(GenKernel); //!< Use custom allocators
+ };
+
+ /*! Describe a compiled program */
+ class GenProgram : public Program
+ {
+ public:
+ /*! Create an empty program */
+ GenProgram(void);
+ /*! Destroy the program */
+ virtual ~GenProgram(void);
+ /*! Implements base class */
+ virtual Kernel *compileKernel(const ir::Unit &unit, const std::string &name);
+ /*! Use custom allocators */
+ GBE_CLASS(GenProgram);
+ };
+
+} /* namespace gbe */
+
+#endif /* __GBE_GEN_PROGRAM_HPP__ */
+
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
new file mode 100644
index 0000000..b407c0f
--- /dev/null
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -0,0 +1,713 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file gen_reg_allocation.cpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#include "ir/profile.hpp"
+#include "ir/function.hpp"
+#include "backend/gen_insn_selection.hpp"
+#include "backend/gen_reg_allocation.hpp"
+#include "backend/gen_register.hpp"
+#include "backend/program.hpp"
+#include "sys/exception.hpp"
+#include <algorithm>
+#include <climits>
+
+namespace gbe
+{
+ /////////////////////////////////////////////////////////////////////////////
+ // Register allocator internal implementation
+ /////////////////////////////////////////////////////////////////////////////
+
+ /*! Provides the location of a register in a vector */
+ typedef std::pair<SelectionVector*, uint32_t> VectorLocation;
+
+ /*! Implements the register allocation */
+ class GenRegAllocator::Opaque
+ {
+ public:
+ /*! Initialize the register allocator */
+ Opaque(GenContext &ctx);
+ /*! Release all taken resources */
+ ~Opaque(void);
+ /*! Perform the register allocation. Return true if success */
+ bool allocate(Selection &selection);
+ /*! Return the Gen register from the selection register */
+ GenRegister genReg(const GenRegister ®);
+ private:
+ /*! Expire one GRF interval. Return true if one was successfully expired */
+ bool expireGRF(const GenRegInterval &limit);
+ /*! Expire a flag register. Return true if one was successfully expired */
+ bool expireFlag(const GenRegInterval &limit);
+ /*! Allocate the virtual boolean (== flags) registers */
+ void allocateFlags(Selection &selection);
+ /*! Allocate the GRF registers */
+ bool allocateGRFs(Selection &selection);
+ /*! Create a Gen register from a register set in the payload */
+ void allocatePayloadReg(gbe_curbe_type, ir::Register, uint32_t subValue = 0, uint32_t subOffset = 0);
+ /*! Create the intervals for each register */
+ /*! Allocate the vectors detected in the instruction selection pass */
+ void allocateVector(Selection &selection);
+ /*! Allocate the given interval. Return true if success */
+ bool createGenReg(const GenRegInterval &interval);
+ /*! Indicate if the registers are already allocated in vectors */
+ bool isAllocated(const SelectionVector *vector) const;
+ /*! Reallocate registers if needed to make the registers in the vector
+ * contigous in memory
+ */
+ void coalesce(Selection &selection, SelectionVector *vector);
+ /*! The context owns the register allocator */
+ GenContext &ctx;
+ /*! Map virtual registers to offset in the (physical) register file */
+ map<ir::Register, uint32_t> RA;
+ /*! Provides the position of each register in a vector */
+ map<ir::Register, VectorLocation> vectorMap;
+ /*! All vectors used in the selection */
+ vector<SelectionVector*> vectors;
+ /*! All vectors that are already expired */
+ set<SelectionVector*> expired;
+ /*! The set of booleans that will go to GRF (cannot be kept into flags) */
+ set<ir::Register> grfBooleans;
+ /*! All the register intervals */
+ vector<GenRegInterval> intervals;
+ /*! Intervals sorting based on starting point positions */
+ vector<GenRegInterval*> starting;
+ /*! Intervals sorting based on ending point positions */
+ vector<GenRegInterval*> ending;
+ /*! Current vector to expire */
+ uint32_t expiringID;
+ /*! Use custom allocator */
+ GBE_CLASS(Opaque);
+ };
+
+ // Note that byte vector registers use two bytes per byte (and can be
+ // interleaved)
+ static const size_t familyVectorSize[] = {2,2,2,4,8};
+ static const size_t familyScalarSize[] = {2,1,2,4,8};
+
+ /*! Interval as used in linear scan allocator. Basically, stores the first and
+ * the last instruction where the register is alive
+ */
+ struct GenRegInterval {
+ INLINE GenRegInterval(ir::Register reg) :
+ reg(reg), minID(INT_MAX), maxID(-INT_MAX) {}
+ ir::Register reg; //!< (virtual) register of the interval
+ int32_t minID, maxID; //!< Starting and ending points
+ };
+
+ GenRegAllocator::Opaque::Opaque(GenContext &ctx) : ctx(ctx) {}
+ GenRegAllocator::Opaque::~Opaque(void) {}
+
+ void GenRegAllocator::Opaque::allocatePayloadReg(gbe_curbe_type value,
+ ir::Register reg,
+ uint32_t subValue,
+ uint32_t subOffset)
+ {
+ using namespace ir;
+ const Kernel *kernel = ctx.getKernel();
+ const int32_t curbeOffset = kernel->getCurbeOffset(value, subValue);
+ if (curbeOffset >= 0) {
+ const uint32_t offset = GEN_REG_SIZE + curbeOffset + subOffset;
+ RA.insert(std::make_pair(reg, offset));
+ this->intervals[reg].minID = 0;
+ }
+ }
+
+ bool GenRegAllocator::Opaque::createGenReg(const GenRegInterval &interval) {
+ using namespace ir;
+ const ir::Register reg = interval.reg;
+ const uint32_t simdWidth = ctx.getSimdWidth();
+ if (RA.contains(reg) == true)
+ return true; // already allocated
+ GBE_ASSERT(ctx.isScalarReg(reg) == false);
+ const bool isScalar = ctx.sel->isScalarOrBool(reg);
+ const RegisterData regData = ctx.sel->getRegisterData(reg);
+ const RegisterFamily family = regData.family;
+ const uint32_t typeSize = isScalar ? familyScalarSize[family] : familyVectorSize[family];
+ const uint32_t regSize = isScalar ? typeSize : simdWidth*typeSize;
+ uint32_t grfOffset;
+ while ((grfOffset = ctx.allocate(regSize, regSize)) == 0) {
+ const bool success = this->expireGRF(interval);
+ if (UNLIKELY(success == false)) return false;
+ }
+ GBE_ASSERTM(grfOffset != 0, "Unable to register allocate");
+ RA.insert(std::make_pair(reg, grfOffset));
+ return true;
+ }
+
+ bool GenRegAllocator::Opaque::isAllocated(const SelectionVector *vector) const {
+ const ir::Register first = vector->reg[0].reg();
+ const auto it = vectorMap.find(first);
+
+ // If the first register is not allocated we are done
+ if (it == vectorMap.end())
+ return false;
+
+ // If there are more left registers than in the found vector, there are
+ // still registers to allocate
+ const SelectionVector *other = it->second.first;
+ const uint32_t otherFirst = it->second.second;
+ const uint32_t leftNum = other->regNum - otherFirst;
+ if (leftNum < vector->regNum)
+ return false;
+
+ // Now check that all the registers in the already allocated vector match
+ // the current vector
+ for (uint32_t regID = 1; regID < vector->regNum; ++regID) {
+ const ir::Register from = vector->reg[regID].reg();
+ const ir::Register to = other->reg[regID + otherFirst].reg();
+ if (from != to)
+ return false;
+ }
+ return true;
+ }
+
+ void GenRegAllocator::Opaque::coalesce(Selection &selection, SelectionVector *vector) {
+ for (uint32_t regID = 0; regID < vector->regNum; ++regID) {
+ const ir::Register reg = vector->reg[regID].reg();
+ const auto it = this->vectorMap.find(reg);
+ // case 1: the register is not already in a vector, so it can stay in this
+ // vector. Note that local IDs are *non-scalar* special registers but will
+ // require a MOV anyway since pre-allocated in the CURBE
+ if (it == vectorMap.end() &&
+ ctx.sel->isScalarOrBool(reg) == false &&
+ ctx.isSpecialReg(reg) == false)
+ {
+ const VectorLocation location = std::make_pair(vector, regID);
+ this->vectorMap.insert(std::make_pair(reg, location));
+ }
+ // case 2: the register is already in another vector, so we need to move
+ // it to a temporary register.
+ // TODO: we can do better than that if we analyze the liveness of the
+ // already allocated registers in the vector. If there is no inteference
+ // and the order is maintained, we can reuse the previous vector and avoid
+ // the MOVs
+ else {
+ ir::Register tmp;
+ if (vector->isSrc)
+ tmp = selection.replaceSrc(vector->insn, regID);
+ else
+ tmp = selection.replaceDst(vector->insn, regID);
+ const VectorLocation location = std::make_pair(vector, regID);
+ this->vectorMap.insert(std::make_pair(tmp, location));
+ }
+ }
+ }
+
+ /*! Will sort vector in decreasing order */
+ inline bool cmp(const SelectionVector *v0, const SelectionVector *v1) {
+ return v0->regNum > v1->regNum;
+ }
+
+ void GenRegAllocator::Opaque::allocateVector(Selection &selection) {
+ const uint32_t vectorNum = selection.getVectorNum();
+ this->vectors.resize(vectorNum);
+
+ // First we find and store all vectors
+ uint32_t vectorID = 0;
+ for (auto &block : *selection.blockList)
+ for (auto &v : block.vectorList)
+ this->vectors[vectorID++] = &v;
+ GBE_ASSERT(vectorID == vectorNum);
+
+ // Heuristic (really simple...): sort them by the number of registers they
+ // contain
+ std::sort(this->vectors.begin(), this->vectors.end(), cmp);
+
+ // Insert MOVs when this is required
+ for (vectorID = 0; vectorID < vectorNum; ++vectorID) {
+ SelectionVector *vector = this->vectors[vectorID];
+ if (this->isAllocated(vector))
+ continue;
+ this->coalesce(selection, vector);
+ }
+ }
+
+ template <bool sortStartingPoint>
+ inline bool cmp(const GenRegInterval *i0, const GenRegInterval *i1) {
+ return sortStartingPoint ? i0->minID < i1->minID : i0->maxID < i1->maxID;
+ }
+
+ bool GenRegAllocator::Opaque::expireGRF(const GenRegInterval &limit) {
+ while (this->expiringID != ending.size()) {
+ const GenRegInterval *toExpire = this->ending[this->expiringID];
+ const ir::Register reg = toExpire->reg;
+
+ // Dead code produced by the insn selection -> we skip it
+ if (toExpire->minID > toExpire->maxID) {
+ this->expiringID++;
+ continue;
+ }
+
+ // Ignore booleans that were allocated with flags
+ // if (ctx.getRegisterFamily(reg) == ir::FAMILY_BOOL && !grfBooleans.contains(reg)) {
+ if (ctx.sel->getRegisterFamily(reg) == ir::FAMILY_BOOL) {
+ this->expiringID++;
+ continue;
+ }
+
+ if (toExpire->maxID >= limit.minID)
+ return false;
+ auto it = RA.find(reg);
+ GBE_ASSERT(it != RA.end());
+
+ // Case 1 - it does not belong to a vector. Just remove it
+ if (vectorMap.contains(reg) == false) {
+ ctx.deallocate(it->second);
+ this->expiringID++;
+ return true;
+ // Case 2 - check that the vector has not been already removed. If not,
+ // since we equaled the intervals of all registers in the vector, we just
+ // remove the complete vector
+ } else {
+ SelectionVector *vector = vectorMap.find(reg)->second.first;
+ if (expired.contains(vector)) {
+ this->expiringID++;
+ continue;
+ } else {
+ const ir::Register first = vector->reg[0].reg();
+ auto it = RA.find(first);
+ GBE_ASSERT(it != RA.end());
+ ctx.deallocate(it->second);
+ expired.insert(vector);
+ this->expiringID++;
+ return true;
+ }
+ }
+ }
+
+ // We were not able to expire anything
+ return false;
+ }
+
+ void GenRegAllocator::Opaque::allocateFlags(Selection &selection) {
+
+ // Store the registers allocated in the map
+ map<ir::Register, uint32_t> allocatedFlags;
+ GenRegInterval spill = ir::Register(ir::RegisterFile::MAX_INDEX);
+
+ // we have two flags we use for booleans f1.0 and f1.1
+ const uint32_t flagNum = 2;
+ uint32_t freeFlags[] = {0,1};
+ uint32_t freeNum = flagNum;
+
+ // Perform the linear scan allocator on the flag registers only. We only use
+ // two flags registers for the booleans right now: f1.0 and f1.1
+ const uint32_t regNum = ctx.sel->getRegNum();
+ uint32_t endID = 0; // interval to expire
+ for (uint32_t startID = 0; startID < regNum; ++startID) {
+ const GenRegInterval &interval = *this->starting[startID];
+ const ir::Register reg = interval.reg;
+ if (ctx.sel->getRegisterFamily(reg) != ir::FAMILY_BOOL)
+ continue; // Not a flag. We don't care
+ if (grfBooleans.contains(reg))
+ continue; // Cannot use a flag register
+ if (interval.maxID == -INT_MAX)
+ continue; // Unused register
+ if (freeNum != 0) {
+ spill = interval;
+ allocatedFlags.insert(std::make_pair(reg, freeFlags[--freeNum]));
+ }
+ else {
+ // Try to expire one register
+ while (endID != ending.size()) {
+ const GenRegInterval *toExpire = this->ending[endID];
+ const ir::Register reg = toExpire->reg;
+ // Dead code produced by the insn selection -> we skip it
+ if (toExpire->minID > toExpire->maxID) {
+ endID++;
+ continue;
+ }
+ // We cannot expire this interval and the next ones
+ if (toExpire->maxID >= interval.minID)
+ break;
+ // Must be a boolean allocated with a flag register
+ if (ctx.sel->getRegisterFamily(reg) != ir::FAMILY_BOOL || grfBooleans.contains(reg)) {
+ endID++;
+ continue;
+ }
+ // We reuse a flag from a previous interval (the oldest one)
+ auto it = allocatedFlags.find(toExpire->reg);
+ GBE_ASSERT(it != allocatedFlags.end());
+ freeFlags[freeNum++] = it->second;
+ endID++;
+ break;
+ }
+
+ // We need to spill one of the previous boolean values
+ if (freeNum == 0) {
+ GBE_ASSERT(uint16_t(spill.reg) != ir::RegisterFile::MAX_INDEX);
+ // We spill the last inserted boolean and use its flag instead for
+ // this one
+ if (spill.maxID > interval.maxID) {
+ auto it = allocatedFlags.find(spill.reg);
+ GBE_ASSERT(it != allocatedFlags.end());
+ allocatedFlags.insert(std::make_pair(reg, it->second));
+ allocatedFlags.erase(spill.reg);
+ grfBooleans.insert(spill.reg);
+ spill = interval;
+ }
+ // We will a grf for the current register
+ else
+ grfBooleans.insert(reg);
+ }
+ else
+ allocatedFlags.insert(std::make_pair(reg, freeFlags[--freeNum]));
+ }
+ }
+
+ // Now, we traverse all the selection instructions and we patch them to make
+ // them use flag registers
+ for (auto &block : *selection.blockList)
+ for (auto &insn : block.insnList) {
+ const uint32_t srcNum = insn.srcNum, dstNum = insn.dstNum;
+
+ // Patch the source booleans
+ for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
+ const GenRegister selReg = insn.src(srcID);
+ const ir::Register reg = selReg.reg();
+ if (selReg.physical || ctx.sel->getRegisterFamily(reg) != ir::FAMILY_BOOL)
+ continue;
+ auto it = allocatedFlags.find(reg);
+ if (it == allocatedFlags.end())
+ continue;
+ // Use a flag register for it now
+ insn.src(srcID) = GenRegister::flag(1,it->second);
+ }
+
+ // Patch the destination booleans
+ for (uint32_t dstID = 0; dstID < dstNum; ++dstID) {
+ const GenRegister selReg = insn.dst(dstID);
+ const ir::Register reg = selReg.reg();
+ if (selReg.physical || ctx.sel->getRegisterFamily(reg) != ir::FAMILY_BOOL)
+ continue;
+ auto it = allocatedFlags.find(reg);
+ if (it == allocatedFlags.end())
+ continue;
+ // Use a flag register for it now
+ insn.dst(dstID) = GenRegister::flag(1,it->second);
+ }
+
+ // Patch the predicate now. Note that only compares actually modify it (it
+ // is called a "conditional modifier"). The other instructions just read
+ // it
+ if (insn.state.physicalFlag == 0) {
+ auto it = allocatedFlags.find(ir::Register(insn.state.flagIndex));
+ // Just patch it if we can use a flag directly
+ if (it != allocatedFlags.end()) {
+ insn.state.flag = 1;
+ insn.state.subFlag = it->second;
+ insn.state.physicalFlag = 1;
+ }
+ // When we let the boolean in a GRF, use f0.1 as a temporary
+ else {
+ // Mov the GRF to the flag such that the flag can be read
+ SelectionInstruction *mov0 = selection.create(SEL_OP_MOV,1,1);
+ mov0->state = GenInstructionState(1);
+ mov0->state.predicate = GEN_PREDICATE_NONE;
+ mov0->state.noMask = 1;
+ mov0->src(0) = GenRegister::uw1grf(ir::Register(insn.state.flagIndex));
+ mov0->dst(0) = GenRegister::flag(0,1);
+
+ // Do not prepend if the flag is not read (== used only as a
+ // conditional modifier)
+ if (insn.state.predicate != GEN_PREDICATE_NONE)
+ insn.prepend(*mov0);
+
+ // We can use f0.1 (our "backdoor" flag)
+ insn.state.flag = 0;
+ insn.state.subFlag = 1;
+ insn.state.physicalFlag = 1;
+
+ // Compare instructions update the flags so we must copy it back to
+ // the GRF
+ if (insn.opcode == SEL_OP_CMP) {
+ SelectionInstruction *mov1 = selection.create(SEL_OP_MOV,1,1);
+ mov1->state = mov0->state;
+ mov1->dst(0) = mov0->src(0);
+ mov1->src(0) = mov0->dst(0);
+ insn.append(*mov1);
+ }
+ }
+ }
+ }
+ }
+
+ bool GenRegAllocator::Opaque::allocateGRFs(Selection &selection) {
+
+ // Perform the linear scan allocator
+ const uint32_t regNum = ctx.sel->getRegNum();
+ for (uint32_t startID = 0; startID < regNum; ++startID) {
+ const GenRegInterval &interval = *this->starting[startID];
+ const ir::Register reg = interval.reg;
+ if (interval.maxID == -INT_MAX)
+ continue; // Unused register
+ if (RA.contains(reg))
+ continue; // already allocated
+
+ // Case 1: the register belongs to a vector, allocate all the registers in
+ // one piece
+ auto it = vectorMap.find(reg);
+ if (it != vectorMap.end()) {
+ const SelectionVector *vector = it->second.first;
+ const uint32_t simdWidth = ctx.getSimdWidth();
+ const uint32_t alignment = simdWidth * sizeof(uint32_t);
+ const uint32_t size = vector->regNum * alignment;
+ uint32_t grfOffset;
+ while ((grfOffset = ctx.allocate(size, alignment)) == 0) {
+ const bool success = this->expireGRF(interval);
+ if (success == false) return false;
+ }
+ for (uint32_t regID = 0; regID < vector->regNum; ++regID, grfOffset += alignment) {
+ const ir::Register reg = vector->reg[regID].reg();
+ GBE_ASSERT(RA.contains(reg) == false);
+ RA.insert(std::make_pair(reg, grfOffset));
+ }
+ }
+ // Case 2: This is a regular scalar register, allocate it alone
+ else if (this->createGenReg(interval) == false)
+ return false;
+ }
+ return true;
+ }
+
+ INLINE bool GenRegAllocator::Opaque::allocate(Selection &selection) {
+ using namespace ir;
+ const Kernel *kernel = ctx.getKernel();
+ const Function &fn = ctx.getFunction();
+ GBE_ASSERT(fn.getProfile() == PROFILE_OCL);
+
+ // Allocate all the vectors first since they need to be contiguous
+ this->allocateVector(selection);
+ // schedulePreRegAllocation(ctx, selection);
+
+ // Now start the linear scan allocation
+ for (uint32_t regID = 0; regID < ctx.sel->getRegNum(); ++regID)
+ this->intervals.push_back(ir::Register(regID));
+
+ // Allocate the special registers (only those which are actually used)
+ allocatePayloadReg(GBE_CURBE_LOCAL_ID_X, ocl::lid0);
+ allocatePayloadReg(GBE_CURBE_LOCAL_ID_Y, ocl::lid1);
+ allocatePayloadReg(GBE_CURBE_LOCAL_ID_Z, ocl::lid2);
+ allocatePayloadReg(GBE_CURBE_LOCAL_SIZE_X, ocl::lsize0);
+ allocatePayloadReg(GBE_CURBE_LOCAL_SIZE_Y, ocl::lsize1);
+ allocatePayloadReg(GBE_CURBE_LOCAL_SIZE_Z, ocl::lsize2);
+ allocatePayloadReg(GBE_CURBE_GLOBAL_SIZE_X, ocl::gsize0);
+ allocatePayloadReg(GBE_CURBE_GLOBAL_SIZE_Y, ocl::gsize1);
+ allocatePayloadReg(GBE_CURBE_GLOBAL_SIZE_Z, ocl::gsize2);
+ allocatePayloadReg(GBE_CURBE_GLOBAL_OFFSET_X, ocl::goffset0);
+ allocatePayloadReg(GBE_CURBE_GLOBAL_OFFSET_Y, ocl::goffset1);
+ allocatePayloadReg(GBE_CURBE_GLOBAL_OFFSET_Z, ocl::goffset2);
+ allocatePayloadReg(GBE_CURBE_GROUP_NUM_X, ocl::numgroup0);
+ allocatePayloadReg(GBE_CURBE_GROUP_NUM_Y, ocl::numgroup1);
+ allocatePayloadReg(GBE_CURBE_GROUP_NUM_Z, ocl::numgroup2);
+ allocatePayloadReg(GBE_CURBE_STACK_POINTER, ocl::stackptr);
+ allocatePayloadReg(GBE_CURBE_THREAD_NUM, ocl::threadn);
+
+ // Group and barrier IDs are always allocated by the hardware in r0
+ RA.insert(std::make_pair(ocl::groupid0, 1*sizeof(float))); // r0.1
+ RA.insert(std::make_pair(ocl::groupid1, 6*sizeof(float))); // r0.6
+ RA.insert(std::make_pair(ocl::groupid2, 7*sizeof(float))); // r0.7
+ RA.insert(std::make_pair(ocl::barrierid, 2*sizeof(float))); // r0.2
+
+ // block IP used to handle the mask in SW is always allocated
+ const int32_t blockIPOffset = GEN_REG_SIZE + kernel->getCurbeOffset(GBE_CURBE_BLOCK_IP,0);
+ GBE_ASSERT(blockIPOffset >= 0 && blockIPOffset % GEN_REG_SIZE == 0);
+ RA.insert(std::make_pair(ocl::blockip, blockIPOffset));
+ this->intervals[ocl::blockip].minID = 0;
+
+ // Allocate all (non-structure) argument parameters
+ const uint32_t argNum = fn.argNum();
+ for (uint32_t argID = 0; argID < argNum; ++argID) {
+ const FunctionArgument &arg = fn.getArg(argID);
+ GBE_ASSERT(arg.type == FunctionArgument::GLOBAL_POINTER ||
+ arg.type == FunctionArgument::CONSTANT_POINTER ||
+ arg.type == FunctionArgument::LOCAL_POINTER ||
+ arg.type == FunctionArgument::VALUE ||
+ arg.type == FunctionArgument::STRUCTURE);
+ allocatePayloadReg(GBE_CURBE_KERNEL_ARGUMENT, arg.reg, argID);
+ }
+
+ // Allocate all pushed registers (i.e. structure kernel arguments)
+ const Function::PushMap &pushMap = fn.getPushMap();
+ for (const auto &pushed : pushMap) {
+ const uint32_t argID = pushed.second.argID;
+ const uint32_t subOffset = pushed.second.offset;
+ const Register reg = pushed.second.getRegister();
+ allocatePayloadReg(GBE_CURBE_KERNEL_ARGUMENT, reg, argID, subOffset);
+ }
+
+ // Compute the intervals
+ int32_t insnID = 0;
+ for (auto &block : *selection.blockList) {
+ int32_t lastID = insnID;
+ // Update the intervals of each used register. Note that we do not
+ // register allocate R0, so we skip all sub-registers in r0
+ for (auto &insn : block.insnList) {
+ const uint32_t srcNum = insn.srcNum, dstNum = insn.dstNum;
+ for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
+ const GenRegister &selReg = insn.src(srcID);
+ const ir::Register reg = selReg.reg();
+ if (selReg.file != GEN_GENERAL_REGISTER_FILE ||
+ reg == ir::ocl::barrierid ||
+ reg == ir::ocl::groupid0 ||
+ reg == ir::ocl::groupid1 ||
+ reg == ir::ocl::groupid2)
+ continue;
+ this->intervals[reg].minID = std::min(this->intervals[reg].minID, insnID);
+ this->intervals[reg].maxID = std::max(this->intervals[reg].maxID, insnID);
+ }
+ for (uint32_t dstID = 0; dstID < dstNum; ++dstID) {
+ const GenRegister &selReg = insn.dst(dstID);
+ const ir::Register reg = selReg.reg();
+ if (selReg.file != GEN_GENERAL_REGISTER_FILE ||
+ reg == ir::ocl::barrierid ||
+ reg == ir::ocl::groupid0 ||
+ reg == ir::ocl::groupid1 ||
+ reg == ir::ocl::groupid2)
+ continue;
+ this->intervals[reg].minID = std::min(this->intervals[reg].minID, insnID);
+ this->intervals[reg].maxID = std::max(this->intervals[reg].maxID, insnID);
+ }
+
+ // Flag registers can only go to src[0]
+ const SelectionOpcode opcode = SelectionOpcode(insn.opcode);
+ if (opcode == SEL_OP_AND || opcode == SEL_OP_OR) {
+ if (insn.src(1).physical == 0) {
+ const ir::Register reg = insn.src(1).reg();
+ if (ctx.sel->getRegisterFamily(reg) == ir::FAMILY_BOOL)
+ grfBooleans.insert(reg);
+ }
+ }
+
+ // OK, a flag is used as a predicate or a conditional modifier
+ if (insn.state.physicalFlag == 0) {
+ const ir::Register reg = ir::Register(insn.state.flagIndex);
+ this->intervals[reg].minID = std::min(this->intervals[reg].minID, insnID);
+ this->intervals[reg].maxID = std::max(this->intervals[reg].maxID, insnID);
+ }
+ lastID = insnID;
+ insnID++;
+ }
+
+ // All registers alive at the end of the block must have their intervals
+ // updated as well
+ const ir::BasicBlock *bb = block.bb;
+ const ir::Liveness::LiveOut &liveOut = ctx.getLiveOut(bb);
+ for (auto reg : liveOut) {
+ this->intervals[reg].minID = std::min(this->intervals[reg].minID, lastID);
+ this->intervals[reg].maxID = std::max(this->intervals[reg].maxID, lastID);
+ }
+ }
+
+ // Extend the liveness of the registers that belong to vectors. Actually,
+ // this is way too brutal, we should instead maintain a list of allocated
+ // intervals to handle vector registers independently while doing the linear
+ // scan (or anything else)
+ for (auto vector : this->vectors) {
+ const uint32_t regNum = vector->regNum;
+ const ir::Register first = vector->reg[0].reg();
+ int32_t minID = this->intervals[first].minID;
+ int32_t maxID = this->intervals[first].maxID;
+ for (uint32_t regID = 1; regID < regNum; ++regID) {
+ const ir::Register reg = vector->reg[regID].reg();
+ minID = std::min(minID, this->intervals[reg].minID);
+ maxID = std::max(maxID, this->intervals[reg].maxID);
+ }
+ for (uint32_t regID = 0; regID < regNum; ++regID) {
+ const ir::Register reg = vector->reg[regID].reg();
+ this->intervals[reg].minID = minID;
+ this->intervals[reg].maxID = maxID;
+ }
+ }
+
+ // Sort both intervals in starting point and ending point increasing orders
+ const uint32_t regNum = ctx.sel->getRegNum();
+ this->starting.resize(regNum);
+ this->ending.resize(regNum);
+ for (uint32_t regID = 0; regID < regNum; ++regID)
+ this->starting[regID] = this->ending[regID] = &intervals[regID];
+ std::sort(this->starting.begin(), this->starting.end(), cmp<true>);
+ std::sort(this->ending.begin(), this->ending.end(), cmp<false>);
+
+ // Remove the registers that were not allocated
+ this->expiringID = 0;
+ while (this->expiringID < regNum) {
+ const GenRegInterval *interval = ending[this->expiringID];
+ if (interval->maxID == -INT_MAX)
+ this->expiringID++;
+ else
+ break;
+ }
+
+ // First we try to put all booleans registers into flags
+ this->allocateFlags(selection);
+
+ // Allocate all the GRFs now (regular register and boolean that are not in
+ // flag registers)
+ return this->allocateGRFs(selection);
+ }
+
+ INLINE GenRegister setGenReg(const GenRegister &src, uint32_t grfOffset) {
+ GenRegister dst;
+ dst = src;
+ dst.physical = 1;
+ dst.nr = grfOffset / GEN_REG_SIZE;
+ dst.subnr = grfOffset % GEN_REG_SIZE;
+ return dst;
+ }
+
+ INLINE GenRegister GenRegAllocator::Opaque::genReg(const GenRegister ®) {
+ if (reg.file == GEN_GENERAL_REGISTER_FILE) {
+ GBE_ASSERT(RA.contains(reg.reg()) != false);
+ const uint32_t grfOffset = RA.find(reg.reg())->second;
+ const GenRegister dst = setGenReg(reg, grfOffset);
+ if (reg.quarter != 0)
+ return GenRegister::Qn(dst, reg.quarter);
+ else
+ return dst;
+ }
+ else
+ return reg;
+ }
+
+ /////////////////////////////////////////////////////////////////////////////
+ // Register allocator public implementation
+ /////////////////////////////////////////////////////////////////////////////
+
+ GenRegAllocator::GenRegAllocator(GenContext &ctx) {
+ this->opaque = GBE_NEW(GenRegAllocator::Opaque, ctx);
+ }
+
+ GenRegAllocator::~GenRegAllocator(void) {
+ GBE_DELETE(this->opaque);
+ }
+
+ bool GenRegAllocator::allocate(Selection &selection) {
+ return this->opaque->allocate(selection);
+ }
+
+ GenRegister GenRegAllocator::genReg(const GenRegister ®) {
+ return this->opaque->genReg(reg);
+ }
+
+} /* namespace gbe */
+
diff --git a/backend/src/backend/gen_reg_allocation.hpp b/backend/src/backend/gen_reg_allocation.hpp
new file mode 100644
index 0000000..b172995
--- /dev/null
+++ b/backend/src/backend/gen_reg_allocation.hpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file gen_reg_allocation.hpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __GBE_GEN_REG_ALLOCATION_HPP__
+#define __GBE_GEN_REG_ALLOCATION_HPP__
+
+#include "ir/register.hpp"
+#include "backend/gen_register.hpp"
+
+namespace gbe
+{
+ class Selection; // Pre-register allocation code generation
+ class GenRegister; // Pre-register allocation Gen register
+ class GenRegInterval; // Liveness interval for each register
+ class GenContext; // Gen specific context
+
+ /*! Register allocate (i.e. virtual to physical register mapping) */
+ class GenRegAllocator
+ {
+ public:
+ /*! Initialize the register allocator */
+ GenRegAllocator(GenContext &ctx);
+ /*! Release all taken resources */
+ ~GenRegAllocator(void);
+ /*! Perform the register allocation */
+ bool allocate(Selection &selection);
+ /*! Virtual to physical translation */
+ GenRegister genReg(const GenRegister ®);
+ private:
+ /*! Actual implementation of the register allocator (use Pimpl) */
+ class Opaque;
+ /*! Created and destroyed in cpp */
+ Opaque *opaque;
+ /*! Use custom allocator */
+ GBE_CLASS(GenRegAllocator);
+ };
+
+} /* namespace gbe */
+
+#endif /* __GBE_GEN_REG_ALLOCATION_HPP__ */
+
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
new file mode 100644
index 0000000..7b91efb
--- /dev/null
+++ b/backend/src/backend/gen_register.hpp
@@ -0,0 +1,787 @@
+/*
+ * Copyright 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith at tungstengraphics.com>
+ */
+/**
+ * \file gen_register.hpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __GEN_REGISTER_HPP__
+#define __GEN_REGISTER_HPP__
+
+#include "backend/gen_defs.hpp"
+#include "ir/register.hpp"
+#include "sys/platform.hpp"
+
+namespace gbe
+{
+
+ /*! Type size in bytes for each Gen type */
+ INLINE int typeSize(uint32_t type) {
+ switch(type) {
+ case GEN_TYPE_UD:
+ case GEN_TYPE_D:
+ case GEN_TYPE_F:
+ return 4;
+ case GEN_TYPE_HF:
+ case GEN_TYPE_UW:
+ case GEN_TYPE_W:
+ return 2;
+ case GEN_TYPE_UB:
+ case GEN_TYPE_B:
+ return 1;
+ default:
+ assert(0);
+ return 0;
+ }
+ }
+
+ /*! Convert a hstride to a number of element */
+ INLINE uint32_t stride(uint32_t stride) {
+ switch (stride) {
+ case 0: return 0;
+ case 1: return 1;
+ case 2: return 2;
+ case 3: return 4;
+ case 4: return 8;
+ case 5: return 16;
+ default: assert(0); return 0;
+ }
+ }
+
+ /*! Encode the instruction state. Note that the flag register can be either
+ * physical (i.e. a real Gen flag) or a virtual boolean register. The flag
+ * register allocation will turn all virtual boolean registers into flag
+ * registers
+ */
+ class GenInstructionState
+ {
+ public:
+ INLINE GenInstructionState(uint32_t simdWidth = 8) {
+ this->execWidth = simdWidth;
+ this->quarterControl = GEN_COMPRESSION_Q1;
+ this->accWrEnable = 0;
+ this->noMask = 0;
+ this->flag = 0;
+ this->subFlag = 0;
+ this->predicate = GEN_PREDICATE_NORMAL;
+ this->inversePredicate = 0;
+ this->physicalFlag = 1;
+ this->flagIndex = 0;
+ }
+ uint32_t physicalFlag:1; //!< Physical or virtual flag register
+ uint32_t flag:1; //!< Only if physical flag
+ uint32_t subFlag:1; //!< Only if physical flag
+ uint32_t flagIndex:16; //!< Only if virtual flag (index of the register)
+ uint32_t execWidth:5;
+ uint32_t quarterControl:1;
+ uint32_t accWrEnable:1;
+ uint32_t noMask:1;
+ uint32_t predicate:4;
+ uint32_t inversePredicate:1;
+ };
+
+ static_assert(sizeof(GenInstructionState) == sizeof(uint32_t), "Invalid state size");
+
+ /*! This is a book-keeping structure used to encode both virtual and physical
+ * registers
+ */
+ class GenRegister
+ {
+ public:
+ /*! Empty constructor */
+ INLINE GenRegister(void) {}
+
+ /*! General constructor */
+ INLINE GenRegister(uint32_t file,
+ ir::Register reg,
+ uint32_t type,
+ uint32_t vstride,
+ uint32_t width,
+ uint32_t hstride)
+ {
+ this->type = type;
+ this->file = file;
+ this->physical = 0;
+ this->value.reg = reg;
+ this->negation = 0;
+ this->absolute = 0;
+ this->vstride = vstride;
+ this->width = width;
+ this->hstride = hstride;
+ this->quarter = 0;
+ this->nr = this->subnr = 0;
+ this->address_mode = GEN_ADDRESS_DIRECT;
+ }
+
+ /*! For specific physical registers only */
+ INLINE GenRegister(uint32_t file,
+ uint32_t nr,
+ uint32_t subnr,
+ uint32_t type,
+ uint32_t vstride,
+ uint32_t width,
+ uint32_t hstride)
+ {
+ this->type = type;
+ this->file = file;
+ this->nr = nr;
+ this->physical = 1;
+ this->subnr = subnr * typeSize(type);
+ this->negation = 0;
+ this->absolute = 0;
+ this->vstride = vstride;
+ this->width = width;
+ this->hstride = hstride;
+ this->quarter = 0;
+ this->address_mode = GEN_ADDRESS_DIRECT;
+ }
+
+ /*! Return the IR virtual register */
+ INLINE ir::Register reg(void) const { return ir::Register(value.reg); }
+
+ /*! For immediates or virtual register */
+ union {
+ float f;
+ int32_t d;
+ uint32_t ud;
+ uint16_t reg;
+ } value;
+
+ uint32_t nr:8; //!< Just for some physical registers (acc, null)
+ uint32_t subnr:8; //!< Idem
+ uint32_t physical:1; //!< 1 if physical, 0 otherwise
+ uint32_t type:4; //!< Gen type
+ uint32_t file:2; //!< Register file
+ uint32_t negation:1; //!< For source
+ uint32_t absolute:1; //!< For source
+ uint32_t vstride:4; //!< Vertical stride
+ uint32_t width:3; //!< Width
+ uint32_t hstride:2; //!< Horizontal stride
+ uint32_t quarter:1; //!< To choose which part we want (Q1 / Q2)
+ uint32_t address_mode:1; //!< direct or indirect
+
+ static INLINE GenRegister QnVirtual(GenRegister reg, uint32_t quarter) {
+ GBE_ASSERT(reg.physical == 0);
+ if (reg.hstride == GEN_HORIZONTAL_STRIDE_0) // scalar register
+ return reg;
+ else {
+ reg.quarter = quarter;
+ return reg;
+ }
+ }
+
+ static INLINE GenRegister QnPhysical(GenRegister reg, uint32_t quarter) {
+ GBE_ASSERT(reg.physical);
+ if (reg.hstride == GEN_HORIZONTAL_STRIDE_0) // scalar register
+ return reg;
+ else {
+ const uint32_t typeSz = typeSize(reg.type);
+ const uint32_t horizontal = stride(reg.hstride);
+ const uint32_t grfOffset = reg.nr*GEN_REG_SIZE + reg.subnr;
+ const uint32_t nextOffset = grfOffset + 8*quarter*horizontal*typeSz;
+ reg.nr = nextOffset / GEN_REG_SIZE;
+ reg.subnr = (nextOffset % GEN_REG_SIZE);
+ return reg;
+ }
+ }
+
+ static INLINE GenRegister Qn(GenRegister reg, uint32_t quarter) {
+ if (reg.physical)
+ return QnPhysical(reg, quarter);
+ else
+ return QnVirtual(reg, quarter);
+ }
+
+ static INLINE GenRegister vec16(uint32_t file, ir::Register reg) {
+ return GenRegister(file,
+ reg,
+ GEN_TYPE_F,
+ GEN_VERTICAL_STRIDE_8,
+ GEN_WIDTH_8,
+ GEN_HORIZONTAL_STRIDE_1);
+ }
+
+ static INLINE GenRegister vec8(uint32_t file, ir::Register reg) {
+ return GenRegister(file,
+ reg,
+ GEN_TYPE_F,
+ GEN_VERTICAL_STRIDE_8,
+ GEN_WIDTH_8,
+ GEN_HORIZONTAL_STRIDE_1);
+ }
+
+ static INLINE GenRegister vec4(uint32_t file, ir::Register reg) {
+ return GenRegister(file,
+ reg,
+ GEN_TYPE_F,
+ GEN_VERTICAL_STRIDE_4,
+ GEN_WIDTH_4,
+ GEN_HORIZONTAL_STRIDE_1);
+ }
+
+ static INLINE GenRegister vec2(uint32_t file, ir::Register reg) {
+ return GenRegister(file,
+ reg,
+ GEN_TYPE_F,
+ GEN_VERTICAL_STRIDE_2,
+ GEN_WIDTH_2,
+ GEN_HORIZONTAL_STRIDE_1);
+ }
+
+ static INLINE GenRegister vec1(uint32_t file, ir::Register reg) {
+ return GenRegister(file,
+ reg,
+ GEN_TYPE_F,
+ GEN_VERTICAL_STRIDE_0,
+ GEN_WIDTH_1,
+ GEN_HORIZONTAL_STRIDE_0);
+ }
+
+ static INLINE GenRegister retype(GenRegister reg, uint32_t type) {
+ reg.type = type;
+ return reg;
+ }
+
+ static INLINE GenRegister ud16(uint32_t file, ir::Register reg) {
+ return retype(vec16(file, reg), GEN_TYPE_UD);
+ }
+
+ static INLINE GenRegister ud8(uint32_t file, ir::Register reg) {
+ return retype(vec8(file, reg), GEN_TYPE_UD);
+ }
+
+ static INLINE GenRegister ud1(uint32_t file, ir::Register reg) {
+ return retype(vec1(file, reg), GEN_TYPE_UD);
+ }
+
+ static INLINE GenRegister d8(uint32_t file, ir::Register reg) {
+ return retype(vec8(file, reg), GEN_TYPE_D);
+ }
+
+ static INLINE GenRegister uw16(uint32_t file, ir::Register reg) {
+ return retype(vec16(file, reg), GEN_TYPE_UW);
+ }
+
+ static INLINE GenRegister uw8(uint32_t file, ir::Register reg) {
+ return retype(vec8(file, reg), GEN_TYPE_UW);
+ }
+
+ static INLINE GenRegister uw1(uint32_t file, ir::Register reg) {
+ return retype(vec1(file, reg), GEN_TYPE_UW);
+ }
+
+ static INLINE GenRegister ub16(uint32_t file, ir::Register reg) {
+ return GenRegister(file,
+ reg,
+ GEN_TYPE_UB,
+ GEN_VERTICAL_STRIDE_16,
+ GEN_WIDTH_8,
+ GEN_HORIZONTAL_STRIDE_2);
+ }
+
+ static INLINE GenRegister ub8(uint32_t file, ir::Register reg) {
+ return GenRegister(file,
+ reg,
+ GEN_TYPE_UB,
+ GEN_VERTICAL_STRIDE_16,
+ GEN_WIDTH_8,
+ GEN_HORIZONTAL_STRIDE_2);
+ }
+
+ static INLINE GenRegister ub1(uint32_t file, ir::Register reg) {
+ return retype(vec1(file, reg), GEN_TYPE_UB);
+ }
+
+ static INLINE GenRegister unpacked_uw(ir::Register reg) {
+ return GenRegister(GEN_GENERAL_REGISTER_FILE,
+ reg,
+ GEN_TYPE_UW,
+ GEN_VERTICAL_STRIDE_16,
+ GEN_WIDTH_8,
+ GEN_HORIZONTAL_STRIDE_2);
+ }
+
+ static INLINE GenRegister unpacked_ub(ir::Register reg) {
+ return GenRegister(GEN_GENERAL_REGISTER_FILE,
+ reg,
+ GEN_TYPE_UB,
+ GEN_VERTICAL_STRIDE_32,
+ GEN_WIDTH_8,
+ GEN_HORIZONTAL_STRIDE_4);
+ }
+
+ static INLINE GenRegister imm(uint32_t type) {
+ return GenRegister(GEN_IMMEDIATE_VALUE,
+ 0,
+ 0,
+ type,
+ GEN_VERTICAL_STRIDE_0,
+ GEN_WIDTH_1,
+ GEN_HORIZONTAL_STRIDE_0);
+ }
+
+ static INLINE GenRegister immf(float f) {
+ GenRegister immediate = imm(GEN_TYPE_F);
+ immediate.value.f = f;
+ return immediate;
+ }
+
+ static INLINE GenRegister immd(int d) {
+ GenRegister immediate = imm(GEN_TYPE_D);
+ immediate.value.d = d;
+ return immediate;
+ }
+
+ static INLINE GenRegister immud(uint32_t ud) {
+ GenRegister immediate = imm(GEN_TYPE_UD);
+ immediate.value.ud = ud;
+ return immediate;
+ }
+
+ static INLINE GenRegister immuw(uint16_t uw) {
+ GenRegister immediate = imm(GEN_TYPE_UW);
+ immediate.value.ud = uw | (uw << 16);
+ return immediate;
+ }
+
+ static INLINE GenRegister immw(int16_t w) {
+ GenRegister immediate = imm(GEN_TYPE_W);
+ immediate.value.d = w | (w << 16);
+ return immediate;
+ }
+
+ static INLINE GenRegister immv(uint32_t v) {
+ GenRegister immediate = imm(GEN_TYPE_V);
+ immediate.vstride = GEN_VERTICAL_STRIDE_0;
+ immediate.width = GEN_WIDTH_8;
+ immediate.hstride = GEN_HORIZONTAL_STRIDE_1;
+ immediate.value.ud = v;
+ return immediate;
+ }
+
+ static INLINE GenRegister immvf(uint32_t v) {
+ GenRegister immediate = imm(GEN_TYPE_VF);
+ immediate.vstride = GEN_VERTICAL_STRIDE_0;
+ immediate.width = GEN_WIDTH_4;
+ immediate.hstride = GEN_HORIZONTAL_STRIDE_1;
+ immediate.value.ud = v;
+ return immediate;
+ }
+
+ static INLINE GenRegister immvf4(uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3) {
+ GenRegister immediate = imm(GEN_TYPE_VF);
+ immediate.vstride = GEN_VERTICAL_STRIDE_0;
+ immediate.width = GEN_WIDTH_4;
+ immediate.hstride = GEN_HORIZONTAL_STRIDE_1;
+ immediate.value.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24));
+ return immediate;
+ }
+
+ static INLINE GenRegister f1grf(ir::Register reg) {
+ return vec1(GEN_GENERAL_REGISTER_FILE, reg);
+ }
+
+ static INLINE GenRegister f2grf(ir::Register reg) {
+ return vec2(GEN_GENERAL_REGISTER_FILE, reg);
+ }
+
+ static INLINE GenRegister f4grf(ir::Register reg) {
+ return vec4(GEN_GENERAL_REGISTER_FILE, reg);
+ }
+
+ static INLINE GenRegister f8grf(ir::Register reg) {
+ return vec8(GEN_GENERAL_REGISTER_FILE, reg);
+ }
+
+ static INLINE GenRegister f16grf(ir::Register reg) {
+ return vec16(GEN_GENERAL_REGISTER_FILE, reg);
+ }
+
+ static INLINE GenRegister ud16grf(ir::Register reg) {
+ return ud16(GEN_GENERAL_REGISTER_FILE, reg);
+ }
+
+ static INLINE GenRegister ud8grf(ir::Register reg) {
+ return ud8(GEN_GENERAL_REGISTER_FILE, reg);
+ }
+
+ static INLINE GenRegister ud1grf(ir::Register reg) {
+ return ud1(GEN_GENERAL_REGISTER_FILE, reg);
+ }
+
+ static INLINE GenRegister uw1grf(ir::Register reg) {
+ return uw1(GEN_GENERAL_REGISTER_FILE, reg);
+ }
+
+ static INLINE GenRegister uw8grf(ir::Register reg) {
+ return uw8(GEN_GENERAL_REGISTER_FILE, reg);
+ }
+
+ static INLINE GenRegister uw16grf(ir::Register reg) {
+ return uw16(GEN_GENERAL_REGISTER_FILE, reg);
+ }
+
+ static INLINE GenRegister ub1grf(ir::Register reg) {
+ return ub1(GEN_GENERAL_REGISTER_FILE, reg);
+ }
+
+ static INLINE GenRegister ub8grf(ir::Register reg) {
+ return ub8(GEN_GENERAL_REGISTER_FILE, reg);
+ }
+
+ static INLINE GenRegister ub16grf(ir::Register reg) {
+ return ub16(GEN_GENERAL_REGISTER_FILE, reg);
+ }
+
+ static INLINE GenRegister null(void) {
+ return GenRegister(GEN_ARCHITECTURE_REGISTER_FILE,
+ GEN_ARF_NULL,
+ 0,
+ GEN_TYPE_F,
+ GEN_VERTICAL_STRIDE_8,
+ GEN_WIDTH_8,
+ GEN_HORIZONTAL_STRIDE_1);
+ }
+
+ static INLINE GenRegister acc(void) {
+ return GenRegister(GEN_ARCHITECTURE_REGISTER_FILE,
+ GEN_ARF_ACCUMULATOR,
+ 0,
+ GEN_TYPE_F,
+ GEN_VERTICAL_STRIDE_8,
+ GEN_WIDTH_8,
+ GEN_HORIZONTAL_STRIDE_1);
+ }
+
+ static INLINE GenRegister ip(void) {
+ return GenRegister(GEN_ARCHITECTURE_REGISTER_FILE,
+ GEN_ARF_IP,
+ 0,
+ GEN_TYPE_D,
+ GEN_VERTICAL_STRIDE_4,
+ GEN_WIDTH_1,
+ GEN_HORIZONTAL_STRIDE_0);
+ }
+
+ static INLINE GenRegister notification1(void) {
+ return GenRegister(GEN_ARCHITECTURE_REGISTER_FILE,
+ GEN_ARF_NOTIFICATION_COUNT,
+ 0,
+ GEN_TYPE_UD,
+ GEN_VERTICAL_STRIDE_0,
+ GEN_WIDTH_1,
+ GEN_HORIZONTAL_STRIDE_0);
+ }
+
+ static INLINE GenRegister flag(uint32_t nr, uint32_t subnr) {
+ return GenRegister(GEN_ARCHITECTURE_REGISTER_FILE,
+ GEN_ARF_FLAG | nr,
+ subnr,
+ GEN_TYPE_UW,
+ GEN_VERTICAL_STRIDE_0,
+ GEN_WIDTH_1,
+ GEN_HORIZONTAL_STRIDE_0);
+ }
+
+ static INLINE GenRegister next(GenRegister reg) {
+ if (reg.physical)
+ reg.nr++;
+ else
+ reg.quarter++;
+ return reg;
+ }
+
+ /*! Build an indirectly addressed source */
+ static INLINE GenRegister indirect(uint32_t type, uint32_t subnr, uint32_t width) {
+ GenRegister reg;
+ reg.type = type;
+ reg.file = GEN_GENERAL_REGISTER_FILE;
+ reg.address_mode = GEN_ADDRESS_REGISTER_INDIRECT_REGISTER;
+ reg.width = width;
+ reg.subnr = subnr;
+ reg.nr = 0;
+ reg.negation = 0;
+ reg.absolute = 0;
+ reg.vstride = 0;
+ reg.hstride = 0;
+ return reg;
+ }
+
+ static INLINE GenRegister vec16(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return GenRegister(file,
+ nr,
+ subnr,
+ GEN_TYPE_F,
+ GEN_VERTICAL_STRIDE_8,
+ GEN_WIDTH_8,
+ GEN_HORIZONTAL_STRIDE_1);
+ }
+
+ static INLINE GenRegister vec8(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return GenRegister(file,
+ nr,
+ subnr,
+ GEN_TYPE_F,
+ GEN_VERTICAL_STRIDE_8,
+ GEN_WIDTH_8,
+ GEN_HORIZONTAL_STRIDE_1);
+ }
+
+ static INLINE GenRegister vec4(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return GenRegister(file,
+ nr,
+ subnr,
+ GEN_TYPE_F,
+ GEN_VERTICAL_STRIDE_4,
+ GEN_WIDTH_4,
+ GEN_HORIZONTAL_STRIDE_1);
+ }
+
+ static INLINE GenRegister vec2(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return GenRegister(file,
+ nr,
+ subnr,
+ GEN_TYPE_F,
+ GEN_VERTICAL_STRIDE_2,
+ GEN_WIDTH_2,
+ GEN_HORIZONTAL_STRIDE_1);
+ }
+
+ static INLINE GenRegister vec1(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return GenRegister(file,
+ nr,
+ subnr,
+ GEN_TYPE_F,
+ GEN_VERTICAL_STRIDE_0,
+ GEN_WIDTH_1,
+ GEN_HORIZONTAL_STRIDE_0);
+ }
+
+ static INLINE GenRegister suboffset(GenRegister reg, uint32_t delta) {
+ reg.subnr += delta * typeSize(reg.type);
+ return reg;
+ }
+
+ static INLINE GenRegister ud16(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return retype(vec16(file, nr, subnr), GEN_TYPE_UD);
+ }
+
+ static INLINE GenRegister ud8(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return retype(vec8(file, nr, subnr), GEN_TYPE_UD);
+ }
+
+ static INLINE GenRegister ud1(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return retype(vec1(file, nr, subnr), GEN_TYPE_UD);
+ }
+
+ static INLINE GenRegister d8(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return retype(vec8(file, nr, subnr), GEN_TYPE_D);
+ }
+
+ static INLINE GenRegister uw16(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return suboffset(retype(vec16(file, nr, 0), GEN_TYPE_UW), subnr);
+ }
+
+ static INLINE GenRegister uw8(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return suboffset(retype(vec8(file, nr, 0), GEN_TYPE_UW), subnr);
+ }
+
+ static INLINE GenRegister uw1(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return suboffset(retype(vec1(file, nr, 0), GEN_TYPE_UW), subnr);
+ }
+
+ static INLINE GenRegister ub16(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return GenRegister(file,
+ nr,
+ subnr,
+ GEN_TYPE_UB,
+ GEN_VERTICAL_STRIDE_16,
+ GEN_WIDTH_8,
+ GEN_HORIZONTAL_STRIDE_2);
+ }
+
+ static INLINE GenRegister ub8(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return GenRegister(file,
+ nr,
+ subnr,
+ GEN_TYPE_UB,
+ GEN_VERTICAL_STRIDE_16,
+ GEN_WIDTH_8,
+ GEN_HORIZONTAL_STRIDE_2);
+ }
+
+ static INLINE GenRegister ub1(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return suboffset(retype(vec1(file, nr, 0), GEN_TYPE_UB), subnr);
+ }
+
+ static INLINE GenRegister f1grf(uint32_t nr, uint32_t subnr) {
+ return vec1(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+ }
+
+ static INLINE GenRegister f2grf(uint32_t nr, uint32_t subnr) {
+ return vec2(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+ }
+
+ static INLINE GenRegister f4grf(uint32_t nr, uint32_t subnr) {
+ return vec4(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+ }
+
+ static INLINE GenRegister f8grf(uint32_t nr, uint32_t subnr) {
+ return vec8(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+ }
+
+ static INLINE GenRegister f16grf(uint32_t nr, uint32_t subnr) {
+ return vec16(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+ }
+
+ static INLINE GenRegister ud16grf(uint32_t nr, uint32_t subnr) {
+ return ud16(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+ }
+
+ static INLINE GenRegister ud8grf(uint32_t nr, uint32_t subnr) {
+ return ud8(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+ }
+
+ static INLINE GenRegister ud1grf(uint32_t nr, uint32_t subnr) {
+ return ud1(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+ }
+
+ static INLINE GenRegister uw1grf(uint32_t nr, uint32_t subnr) {
+ return uw1(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+ }
+
+ static INLINE GenRegister uw8grf(uint32_t nr, uint32_t subnr) {
+ return uw8(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+ }
+
+ static INLINE GenRegister uw16grf(uint32_t nr, uint32_t subnr) {
+ return uw16(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+ }
+
+ static INLINE GenRegister ub1grf(uint32_t nr, uint32_t subnr) {
+ return ub1(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+ }
+
+ static INLINE GenRegister ub8grf(uint32_t nr, uint32_t subnr) {
+ return ub8(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+ }
+
+ static INLINE GenRegister ub16grf(uint32_t nr, uint32_t subnr) {
+ return ub16(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+ }
+
+ static INLINE GenRegister mask(uint32_t subnr) {
+ return uw1(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_MASK, subnr);
+ }
+
+ static INLINE GenRegister addr1(uint32_t subnr) {
+ return uw1(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_ADDRESS, subnr);
+ }
+
+ static INLINE GenRegister addr8(uint32_t subnr) {
+ return uw8(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_ADDRESS, subnr);
+ }
+
+ static INLINE GenRegister negate(GenRegister reg) {
+ if (reg.file != GEN_IMMEDIATE_VALUE)
+ reg.negation ^= 1;
+ else {
+ if (reg.type == GEN_TYPE_F)
+ reg.value.f = -reg.value.f;
+ else if (reg.type == GEN_TYPE_UD)
+ reg.value.ud = -reg.value.ud;
+ else if (reg.type == GEN_TYPE_D)
+ reg.value.d = -reg.value.d;
+ else if (reg.type == GEN_TYPE_UW) {
+ const uint16_t uw = reg.value.ud & 0xffff;
+ reg = GenRegister::immuw(-uw);
+ } else if (reg.type == GEN_TYPE_W) {
+ const uint16_t uw = reg.value.ud & 0xffff;
+ reg = GenRegister::immw(-(int16_t)uw);
+ } else
+ NOT_SUPPORTED;
+ }
+ return reg;
+ }
+
+ static INLINE GenRegister abs(GenRegister reg) {
+ reg.absolute = 1;
+ reg.negation = 0;
+ return reg;
+ }
+
+ /*! Generate register encoding with run-time simdWidth */
+#define DECL_REG_ENCODER(NAME, SIMD16, SIMD8, SIMD1) \
+ template <typename... Args> \
+ static INLINE GenRegister NAME(uint32_t simdWidth, Args... values) { \
+ if (simdWidth == 16) \
+ return SIMD16(values...); \
+ else if (simdWidth == 8) \
+ return SIMD8(values...); \
+ else if (simdWidth == 1) \
+ return SIMD1(values...); \
+ else { \
+ NOT_IMPLEMENTED; \
+ return SIMD1(values...); \
+ } \
+ }
+ DECL_REG_ENCODER(fxgrf, f16grf, f8grf, f1grf);
+ DECL_REG_ENCODER(uwxgrf, uw16grf, uw8grf, uw1grf);
+ DECL_REG_ENCODER(udxgrf, ud16grf, ud8grf, ud1grf);
+#undef DECL_REG_ENCODER
+ };
+} /* namespace gbe */
+
+#endif /* __GEN_REGISTER_HPP__ */
+
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
new file mode 100644
index 0000000..a14e139
--- /dev/null
+++ b/backend/src/backend/program.cpp
@@ -0,0 +1,277 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file callback interface for the compiler
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "program.h"
+#include "program.hpp"
+#include "gen_program.h"
+#include "sys/platform.hpp"
+#include "sys/cvar.hpp"
+#include "ir/liveness.hpp"
+#include "ir/value.hpp"
+#include "ir/unit.hpp"
+#include "llvm/llvm_to_gen.hpp"
+#include "llvm/Config/config.h"
+#include <cstring>
+#include <algorithm>
+#include <fstream>
+
+/* Not defined for LLVM 3.0 */
+#if !defined(LLVM_VERSION_MAJOR)
+#define LLVM_VERSION_MAJOR 3
+#endif /* !defined(LLVM_VERSION_MAJOR) */
+
+/* Not defined for LLVM 3.0 */
+#if !defined(LLVM_VERSION_MINOR)
+#define LLVM_VERSION_MINOR 0
+#endif /* !defined(LLVM_VERSION_MINOR) */
+
+namespace gbe {
+
+ Kernel::Kernel(const std::string &name) :
+ name(name), args(NULL), argNum(0), curbeSize(0), stackSize(0), useSLM(false)
+ {}
+ Kernel::~Kernel(void) {
+ GBE_SAFE_DELETE_ARRAY(args);
+ }
+ int32_t Kernel::getCurbeOffset(gbe_curbe_type type, uint32_t subType) const {
+ const PatchInfo patch(type, subType);
+ const auto it = std::lower_bound(patches.begin(), patches.end(), patch);
+ if (it == patches.end()) return -1; // nothing found
+ if (patch < *it) return -1; // they are not equal
+ return it->offset; // we found it!
+ }
+
+ Program::Program(void) {}
+ Program::~Program(void) {
+ for (auto &kernel : kernels) GBE_DELETE(kernel.second);
+ }
+
+ BVAR(OCL_OUTPUT_GEN_IR, false);
+
+ bool Program::buildFromLLVMFile(const char *fileName, std::string &error) {
+ ir::Unit unit;
+ if (llvmToGen(unit, fileName) == false) {
+ error = std::string(fileName) + " not found";
+ return false;
+ }
+ this->buildFromUnit(unit, error);
+ return true;
+ }
+
+ bool Program::buildFromUnit(const ir::Unit &unit, std::string &error) {
+ const auto &set = unit.getFunctionSet();
+ const uint32_t kernelNum = set.size();
+ if (OCL_OUTPUT_GEN_IR) std::cout << unit;
+ if (kernelNum == 0) return true;
+ for (const auto &pair : set) {
+ const std::string &name = pair.first;
+ Kernel *kernel = this->compileKernel(unit, name);
+ kernels.insert(std::make_pair(name, kernel));
+ }
+ return true;
+ }
+
+ static void programDelete(gbe_program gbeProgram) {
+ gbe::Program *program = (gbe::Program*)(gbeProgram);
+ GBE_SAFE_DELETE(program);
+ }
+
+ extern std::string ocl_stdlib_str;
+ static gbe_program programNewFromSource(const char *source,
+ size_t stringSize,
+ char *err,
+ size_t *errSize)
+ {
+ char clStr[L_tmpnam+1], llStr[L_tmpnam+1];
+ const std::string clName = std::string(tmpnam_r(clStr)) + ".cl"; /* unsafe! */
+ const std::string llName = std::string(tmpnam_r(llStr)) + ".ll"; /* unsafe! */
+
+ // Write the source to the cl file
+ FILE *clFile = fopen(clName.c_str(), "w");
+ FATAL_IF(clFile == NULL, "Failed to open temporary file");
+ fwrite(ocl_stdlib_str.c_str(), strlen(ocl_stdlib_str.c_str()), 1, clFile);
+ fwrite(source, strlen(source), 1, clFile);
+ fclose(clFile);
+
+ // Now compile the code to llvm using clang
+#if LLVM_VERSION_MINOR <= 1
+ std::string compileCmd = LLVM_PREFIX "/bin/clang -x cl -fno-color-diagnostics -emit-llvm -O3 -ccc-host-triple ptx32 -c ";
+#else
+ std::string compileCmd = LLVM_PREFIX "/bin/clang -ffp-contract=off -target nvptx -x cl -fno-color-diagnostics -emit-llvm -O3 -c ";
+#endif /* LLVM_VERSION_MINOR <= 1 */
+ compileCmd += clName;
+ compileCmd += " -o ";
+ compileCmd += llName;
+
+ // Open a pipe and compile from here. Using Clang API instead is better
+ FILE *pipe = popen(compileCmd.c_str(), "r");
+ FATAL_IF (pipe == NULL, "Unable to run extern compilation command");
+ char msg[256];
+ while (fgets(msg, sizeof(msg), pipe))
+ std::cout << msg;
+ pclose(pipe);
+
+ // Now build the program from llvm
+ return gbe_program_new_from_llvm(llName.c_str(), stringSize, err, errSize);
+ }
+
+ static uint32_t programGetKernelNum(gbe_program gbeProgram) {
+ if (gbeProgram == NULL) return 0;
+ const gbe::Program *program = (const gbe::Program*) gbeProgram;
+ return program->getKernelNum();
+ }
+
+ static gbe_kernel programGetKernelByName(gbe_program gbeProgram, const char *name) {
+ if (gbeProgram == NULL) return NULL;
+ const gbe::Program *program = (gbe::Program*) gbeProgram;
+ return (gbe_kernel) program->getKernel(std::string(name));
+ }
+
+ static gbe_kernel programGetKernel(const gbe_program gbeProgram, uint32_t ID) {
+ if (gbeProgram == NULL) return NULL;
+ const gbe::Program *program = (gbe::Program*) gbeProgram;
+ return (gbe_kernel) program->getKernel(ID);
+ }
+
+ static const char *kernelGetName(gbe_kernel genKernel) {
+ if (genKernel == NULL) return NULL;
+ const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel;
+ return kernel->getName();
+ }
+
+ static const char *kernelGetCode(gbe_kernel genKernel) {
+ if (genKernel == NULL) return NULL;
+ const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel;
+ return kernel->getCode();
+ }
+
+ static size_t kernelGetCodeSize(gbe_kernel genKernel) {
+ if (genKernel == NULL) return 0u;
+ const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel;
+ return kernel->getCodeSize();
+ }
+
+ static uint32_t kernelGetArgNum(gbe_kernel genKernel) {
+ if (genKernel == NULL) return 0u;
+ const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel;
+ return kernel->getArgNum();
+ }
+
+ static uint32_t kernelGetArgSize(gbe_kernel genKernel, uint32_t argID) {
+ if (genKernel == NULL) return 0u;
+ const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel;
+ return kernel->getArgSize(argID);
+ }
+
+ static gbe_arg_type kernelGetArgType(gbe_kernel genKernel, uint32_t argID) {
+ if (genKernel == NULL) return GBE_ARG_INVALID;
+ const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel;
+ return kernel->getArgType(argID);
+ }
+
+ static uint32_t kernelGetSIMDWidth(gbe_kernel genKernel) {
+ if (genKernel == NULL) return GBE_ARG_INVALID;
+ const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel;
+ return kernel->getSIMDWidth();
+ }
+
+ static int32_t kernelGetCurbeOffset(gbe_kernel genKernel, gbe_curbe_type type, uint32_t subType) {
+ if (genKernel == NULL) return 0;
+ const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel;
+ return kernel->getCurbeOffset(type, subType);
+ }
+
+ static int32_t kernelGetCurbeSize(gbe_kernel genKernel) {
+ if (genKernel == NULL) return 0;
+ const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel;
+ return kernel->getCurbeSize();
+ }
+
+ static int32_t kernelGetStackSize(gbe_kernel genKernel) {
+ if (genKernel == NULL) return 0;
+ const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel;
+ return kernel->getStackSize();
+ }
+
+ static int32_t kernelUseSLM(gbe_kernel genKernel) {
+ if (genKernel == NULL) return 0;
+ const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel;
+ return kernel->getUseSLM() ? 1 : 0;
+ }
+
+ static uint32_t kernelGetRequiredWorkGroupSize(gbe_kernel kernel, uint32_t dim) {
+ return 0u;
+ }
+} /* namespace gbe */
+
+GBE_EXPORT_SYMBOL gbe_program_new_from_source_cb *gbe_program_new_from_source = NULL;
+GBE_EXPORT_SYMBOL gbe_program_new_from_binary_cb *gbe_program_new_from_binary = NULL;
+GBE_EXPORT_SYMBOL gbe_program_new_from_llvm_cb *gbe_program_new_from_llvm = NULL;
+GBE_EXPORT_SYMBOL gbe_program_delete_cb *gbe_program_delete = NULL;
+GBE_EXPORT_SYMBOL gbe_program_get_kernel_num_cb *gbe_program_get_kernel_num = NULL;
+GBE_EXPORT_SYMBOL gbe_program_get_kernel_by_name_cb *gbe_program_get_kernel_by_name = NULL;
+GBE_EXPORT_SYMBOL gbe_program_get_kernel_cb *gbe_program_get_kernel = NULL;
+GBE_EXPORT_SYMBOL gbe_kernel_get_name_cb *gbe_kernel_get_name = NULL;
+GBE_EXPORT_SYMBOL gbe_kernel_get_code_cb *gbe_kernel_get_code = NULL;
+GBE_EXPORT_SYMBOL gbe_kernel_get_code_size_cb *gbe_kernel_get_code_size = NULL;
+GBE_EXPORT_SYMBOL gbe_kernel_get_arg_num_cb *gbe_kernel_get_arg_num = NULL;
+GBE_EXPORT_SYMBOL gbe_kernel_get_arg_size_cb *gbe_kernel_get_arg_size = NULL;
+GBE_EXPORT_SYMBOL gbe_kernel_get_arg_type_cb *gbe_kernel_get_arg_type = NULL;
+GBE_EXPORT_SYMBOL gbe_kernel_get_simd_width_cb *gbe_kernel_get_simd_width = NULL;
+GBE_EXPORT_SYMBOL gbe_kernel_get_curbe_offset_cb *gbe_kernel_get_curbe_offset = NULL;
+GBE_EXPORT_SYMBOL gbe_kernel_get_curbe_size_cb *gbe_kernel_get_curbe_size = NULL;
+GBE_EXPORT_SYMBOL gbe_kernel_get_stack_size_cb *gbe_kernel_get_stack_size = NULL;
+GBE_EXPORT_SYMBOL gbe_kernel_get_required_work_group_size_cb *gbe_kernel_get_required_work_group_size = NULL;
+GBE_EXPORT_SYMBOL gbe_kernel_use_slm_cb *gbe_kernel_use_slm = NULL;
+
+namespace gbe
+{
+ /* Use pre-main to setup the call backs */
+ struct CallBackInitializer
+ {
+ CallBackInitializer(void) {
+ gbe_program_new_from_source = gbe::programNewFromSource;
+ gbe_program_delete = gbe::programDelete;
+ gbe_program_get_kernel_num = gbe::programGetKernelNum;
+ gbe_program_get_kernel_by_name = gbe::programGetKernelByName;
+ gbe_program_get_kernel = gbe::programGetKernel;
+ gbe_kernel_get_name = gbe::kernelGetName;
+ gbe_kernel_get_code = gbe::kernelGetCode;
+ gbe_kernel_get_code_size = gbe::kernelGetCodeSize;
+ gbe_kernel_get_arg_num = gbe::kernelGetArgNum;
+ gbe_kernel_get_arg_size = gbe::kernelGetArgSize;
+ gbe_kernel_get_arg_type = gbe::kernelGetArgType;
+ gbe_kernel_get_simd_width = gbe::kernelGetSIMDWidth;
+ gbe_kernel_get_curbe_offset = gbe::kernelGetCurbeOffset;
+ gbe_kernel_get_curbe_size = gbe::kernelGetCurbeSize;
+ gbe_kernel_get_stack_size = gbe::kernelGetStackSize;
+ gbe_kernel_get_required_work_group_size = gbe::kernelGetRequiredWorkGroupSize;
+ gbe_kernel_use_slm = gbe::kernelUseSLM;
+ genSetupCallBacks();
+ }
+ };
+
+ static CallBackInitializer cbInitializer;
+} /* namespace gbe */
+
diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h
new file mode 100644
index 0000000..9045488
--- /dev/null
+++ b/backend/src/backend/program.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file program.h
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ *
+ * C interface for the Gen kernels and programs (either real Gen ISA or Gen
+ * simulator). This is the only thing the run-time can see from the compiler
+ */
+
+#ifndef __GBE_PROGRAM_H__
+#define __GBE_PROGRAM_H__
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*! Opaque structure that interfaces a GBE program */
+typedef struct _gbe_program *gbe_program;
+
+/*! Opaque structure that interfaces a GBE kernel (ie one OCL function) */
+typedef struct _gbe_kernel *gbe_kernel;
+
+/*! Argument type for each function call */
+enum gbe_arg_type {
+ GBE_ARG_VALUE = 0, // int, float and so on
+ GBE_ARG_GLOBAL_PTR = 1, // __global
+ GBE_ARG_CONSTANT_PTR = 2, // __constant
+ GBE_ARG_LOCAL_PTR = 3, // __local
+ GBE_ARG_IMAGE = 4, // image2d_t, image3d_t
+ GBE_ARG_INVALID = 5
+};
+
+/*! Constant buffer values (ie values to setup in the constant buffer) */
+enum gbe_curbe_type {
+ GBE_CURBE_LOCAL_ID_X = 0,
+ GBE_CURBE_LOCAL_ID_Y,
+ GBE_CURBE_LOCAL_ID_Z,
+ GBE_CURBE_LOCAL_SIZE_X,
+ GBE_CURBE_LOCAL_SIZE_Y,
+ GBE_CURBE_LOCAL_SIZE_Z,
+ GBE_CURBE_GLOBAL_SIZE_X,
+ GBE_CURBE_GLOBAL_SIZE_Y,
+ GBE_CURBE_GLOBAL_SIZE_Z,
+ GBE_CURBE_GLOBAL_OFFSET_X,
+ GBE_CURBE_GLOBAL_OFFSET_Y,
+ GBE_CURBE_GLOBAL_OFFSET_Z,
+ GBE_CURBE_GROUP_NUM_X,
+ GBE_CURBE_GROUP_NUM_Y,
+ GBE_CURBE_GROUP_NUM_Z,
+ GBE_CURBE_IMAGE_WIDTH,
+ GBE_CURBE_IMAGE_HEIGHT,
+ GBE_CURBE_IMAGE_DEPTH,
+ GBE_CURBE_STACK_POINTER,
+ GBE_CURBE_KERNEL_ARGUMENT,
+ GBE_CURBE_EXTRA_ARGUMENT,
+ GBE_CURBE_BLOCK_IP,
+ GBE_CURBE_THREAD_NUM
+};
+
+/*! Extra arguments use the negative range of sub-values */
+enum gbe_extra_argument {
+ GBE_STACK_BUFFER = 0 /* Give stack location in curbe */
+};
+
+/*! Create a new program from the given source code (zero terminated string) */
+typedef gbe_program (gbe_program_new_from_source_cb)(const char *source,
+ size_t stringSize,
+ char *err,
+ size_t *err_size);
+extern gbe_program_new_from_source_cb *gbe_program_new_from_source;
+
+/*! Create a new program from the given blob */
+typedef gbe_program (gbe_program_new_from_binary_cb)(const char *binary, size_t size);
+extern gbe_program_new_from_binary_cb *gbe_program_new_from_binary;
+
+/*! Create a new program from the given LLVM file */
+typedef gbe_program (gbe_program_new_from_llvm_cb)(const char *fileName,
+ size_t string_size,
+ char *err,
+ size_t *err_size);
+extern gbe_program_new_from_llvm_cb *gbe_program_new_from_llvm;
+
+/*! Destroy and deallocate the given program */
+typedef void (gbe_program_delete_cb)(gbe_program);
+extern gbe_program_delete_cb *gbe_program_delete;
+
+/*! Get the number of functions in the program */
+typedef uint32_t (gbe_program_get_kernel_num_cb)(gbe_program);
+extern gbe_program_get_kernel_num_cb *gbe_program_get_kernel_num;
+
+/*! Get the kernel from its name */
+typedef gbe_kernel (gbe_program_get_kernel_by_name_cb)(gbe_program, const char *name);
+extern gbe_program_get_kernel_by_name_cb *gbe_program_get_kernel_by_name;
+
+/*! Get the kernel from its ID */
+typedef gbe_kernel (gbe_program_get_kernel_cb)(gbe_program, uint32_t ID);
+extern gbe_program_get_kernel_cb *gbe_program_get_kernel;
+
+/*! Get the kernel name */
+typedef const char *(gbe_kernel_get_name_cb)(gbe_kernel);
+extern gbe_kernel_get_name_cb *gbe_kernel_get_name;
+
+/*! Get the kernel source code */
+typedef const char *(gbe_kernel_get_code_cb)(gbe_kernel);
+extern gbe_kernel_get_code_cb *gbe_kernel_get_code;
+
+/*! Get the size of the source code */
+typedef size_t (gbe_kernel_get_code_size_cb)(gbe_kernel);
+extern gbe_kernel_get_code_size_cb *gbe_kernel_get_code_size;
+
+/*! Get the total number of arguments */
+typedef uint32_t (gbe_kernel_get_arg_num_cb)(gbe_kernel);
+extern gbe_kernel_get_arg_num_cb *gbe_kernel_get_arg_num;
+
+/*! Get the size of the given argument */
+typedef uint32_t (gbe_kernel_get_arg_size_cb)(gbe_kernel, uint32_t argID);
+extern gbe_kernel_get_arg_size_cb *gbe_kernel_get_arg_size;
+
+/*! Get the type of the given argument */
+typedef enum gbe_arg_type (gbe_kernel_get_arg_type_cb)(gbe_kernel, uint32_t argID);
+extern gbe_kernel_get_arg_type_cb *gbe_kernel_get_arg_type;
+
+/*! Get the simd width for the kernel */
+typedef uint32_t (gbe_kernel_get_simd_width_cb)(gbe_kernel);
+extern gbe_kernel_get_simd_width_cb *gbe_kernel_get_simd_width;
+
+/*! Get the curbe size required by the kernel */
+typedef int32_t (gbe_kernel_get_curbe_size_cb)(gbe_kernel);
+extern gbe_kernel_get_curbe_size_cb *gbe_kernel_get_curbe_size;
+
+/*! Get the stack size (zero if no stack is required) */
+typedef int32_t (gbe_kernel_get_stack_size_cb)(gbe_kernel);
+extern gbe_kernel_get_stack_size_cb *gbe_kernel_get_stack_size;
+
+/*! Get the curbe offset where to put the data. Returns -1 if not required */
+typedef int32_t (gbe_kernel_get_curbe_offset_cb)(gbe_kernel, enum gbe_curbe_type type, uint32_t sub_type);
+extern gbe_kernel_get_curbe_offset_cb *gbe_kernel_get_curbe_offset;
+
+/*! Indicates if a work group size is required. Return the required width or 0
+ * if none
+ */
+typedef uint32_t (gbe_kernel_get_required_work_group_size_cb)(gbe_kernel, uint32_t dim);
+extern gbe_kernel_get_required_work_group_size_cb *gbe_kernel_get_required_work_group_size;
+
+/*! Says if SLM is used. Required to reconfigure the L3 complex */
+typedef int32_t (gbe_kernel_use_slm_cb)(gbe_kernel);
+extern gbe_kernel_use_slm_cb *gbe_kernel_use_slm;
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __GBE_PROGRAM_H__ */
+
diff --git a/backend/src/backend/program.hpp b/backend/src/backend/program.hpp
new file mode 100644
index 0000000..646469b
--- /dev/null
+++ b/backend/src/backend/program.hpp
@@ -0,0 +1,158 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file program.hpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __GBE_PROGRAM_HPP__
+#define __GBE_PROGRAM_HPP__
+
+#include "backend/program.h"
+#include "sys/hash_map.hpp"
+#include "sys/vector.hpp"
+#include <string>
+
+namespace gbe {
+namespace ir {
+ class Unit; // Compilation unit. Contains the program to compile
+} /* namespace ir */
+} /* namespace gbe */
+
+namespace gbe {
+
+ /*! Info for the kernel argument */
+ struct KernelArgument {
+ gbe_arg_type type; //!< Pointer, structure, image, regular value?
+ uint32_t size; //!< Size of the argument
+ };
+
+ /*! Stores the offset where to patch where to patch */
+ struct PatchInfo {
+ INLINE PatchInfo(gbe_curbe_type type, uint32_t subType = 0u, uint32_t offset = 0u) :
+ type(uint32_t(type)), subType(subType), offset(offset) {}
+ INLINE PatchInfo(void) {}
+ uint32_t type : 8; //!< Type of the patch (see program.h for the list)
+ uint32_t subType : 8; //!< Optional sub-type of the patch (see program.h)
+ uint32_t offset : 16; //!< Optional offset to encode
+ };
+
+ /*! We will sort PatchInfo to make binary search */
+ INLINE bool operator< (PatchInfo i0, PatchInfo i1) {
+ if (i0.type != i1.type) return i0.type < i1.type;
+ return i0.subType < i1.subType;
+ }
+
+ /*! Describe a compiled kernel */
+ class Kernel : public NonCopyable
+ {
+ public:
+ /*! Create an empty kernel with the given name */
+ Kernel(const std::string &name);
+ /*! Destroy it */
+ virtual ~Kernel(void);
+ /*! Return the instruction stream (to be implemented) */
+ virtual const char *getCode(void) const = 0;
+ /*! Return the instruction stream size (to be implemented) */
+ virtual size_t getCodeSize(void) const = 0;
+ /*! Get the kernel name */
+ INLINE const char *getName(void) const { return name.c_str(); }
+ /*! Return the number of arguments for the kernel call */
+ INLINE uint32_t getArgNum(void) const { return argNum; }
+ /*! Return the size of the given argument */
+ INLINE uint32_t getArgSize(uint32_t argID) const {
+ return argID >= argNum ? 0u : args[argID].size;
+ }
+ /*! Return the type of the given argument */
+ INLINE gbe_arg_type getArgType(uint32_t argID) const {
+ return argID >= argNum ? GBE_ARG_INVALID : args[argID].type;
+ }
+ /*! Get the offset where to patch. Returns -1 if no patch needed */
+ int32_t getCurbeOffset(gbe_curbe_type type, uint32_t subType) const;
+ /*! Get the curbe size required by the kernel */
+ INLINE uint32_t getCurbeSize(void) const { return this->curbeSize; }
+ /*! Return the size of the stack (zero if none) */
+ INLINE uint32_t getStackSize(void) const { return this->stackSize; }
+ /*! Get the SIMD width for the kernel */
+ INLINE uint32_t getSIMDWidth(void) const { return this->simdWidth; }
+ /*! Says if SLM is needed for it */
+ INLINE bool getUseSLM(void) const { return this->useSLM; }
+ protected:
+ friend class Context; //!< Owns the kernels
+ const std::string name; //!< Kernel name
+ KernelArgument *args; //!< Each argument
+ vector<PatchInfo> patches; //!< Indicates how to build the curbe
+ uint32_t argNum; //!< Number of function arguments
+ uint32_t curbeSize; //!< Size of the data to push
+ uint32_t simdWidth; //!< SIMD size for the kernel (lane number)
+ uint32_t stackSize; //!< Stack size (may be 0 if unused)
+ bool useSLM; //!< SLM requires a special HW config
+ GBE_CLASS(Kernel); //!< Use custom allocators
+ };
+
+ /*! Describe a compiled program */
+ class Program : public NonCopyable
+ {
+ public:
+ /*! Create an empty program */
+ Program(void);
+ /*! Destroy the program */
+ virtual ~Program(void);
+ /*! Get the number of kernels in the program */
+ uint32_t getKernelNum(void) const { return kernels.size(); }
+ /*! Get the kernel from its name */
+ Kernel *getKernel(const std::string &name) const {
+ auto it = kernels.find(name);
+ if (it == kernels.end())
+ return NULL;
+ else
+ return it->second;
+ }
+ /*! Get the kernel from its ID */
+ Kernel *getKernel(uint32_t ID) const {
+ uint32_t currID = 0;
+ Kernel *kernel = NULL;
+ for (const auto &pair : kernels) {
+ if (currID == ID) {
+ kernel = pair.second;
+ break;
+ }
+ }
+ return kernel;
+ }
+ /*! Build a program from a ir::Unit */
+ bool buildFromUnit(const ir::Unit &unit, std::string &error);
+ /*! Buils a program from a LLVM source code */
+ bool buildFromLLVMFile(const char *fileName, std::string &error);
+ /*! Buils a program from a OCL string */
+ bool buildFromSource(const char *source, std::string &error);
+ protected:
+ /*! Compile a kernel */
+ virtual Kernel *compileKernel(const ir::Unit &unit, const std::string &name) = 0;
+ /*! Kernels sorted by their name */
+ hash_map<std::string, Kernel*> kernels;
+ /*! Use custom allocators */
+ GBE_CLASS(Program);
+ };
+
+} /* namespace gbe */
+
+#endif /* __GBE_PROGRAM_HPP__ */
+
diff --git a/backend/src/ir/Makefile b/backend/src/ir/Makefile
new file mode 100644
index 0000000..c8f77f9
--- /dev/null
+++ b/backend/src/ir/Makefile
@@ -0,0 +1,4 @@
+TOP=../..
+SUBDIRS=.
+
+include $(TOP)/Makefile.shared
diff --git a/backend/src/ir/constant.cpp b/backend/src/ir/constant.cpp
new file mode 100644
index 0000000..c9f5bfe
--- /dev/null
+++ b/backend/src/ir/constant.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file constant.hpp
+ *
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#include "constant.hpp"
+
+namespace gbe {
+namespace ir {
+
+ void ConstantSet::append(const char *data,
+ const std::string &name,
+ uint32_t size,
+ uint32_t alignment)
+ {
+ const uint32_t offset = ALIGN(this->data.size(), alignment);
+ const uint32_t padding = offset - this->data.size();
+ const Constant constant(name, size, alignment, offset);
+ constants.push_back(constant);
+ for (uint32_t i = 0; i < padding; ++i) this->data.push_back(0);
+ for (uint32_t i = 0; i < size; ++i) this->data.push_back(data[i]);
+ }
+
+} /* namespace ir */
+} /* namespace gbe */
+
diff --git a/backend/src/ir/constant.hpp b/backend/src/ir/constant.hpp
new file mode 100644
index 0000000..3a23dc2
--- /dev/null
+++ b/backend/src/ir/constant.hpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file constant.cpp
+ *
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#ifndef __GBE_IR_CONSTANT_HPP__
+#define __GBE_IR_CONSTANT_HPP__
+
+#include "sys/vector.hpp"
+
+namespace gbe {
+namespace ir {
+
+ /*! Describe one constant (may be a scalar or an array) */
+ class Constant
+ {
+ public:
+ /*! Build a constant description */
+ INLINE Constant(const std::string &name, uint32_t size, uint32_t alignment, uint32_t offset) :
+ name(name), size(size), alignment(alignment), offset(offset) {}
+ /*! Copy constructor */
+ INLINE Constant(const Constant &other) :
+ name(other.name), size(other.size), alignment(other.alignment), offset(other.offset) {}
+ /*! Copy operator */
+ INLINE Constant& operator= (const Constant &other) {
+ this->name = other.name;
+ this->size = other.size;
+ this->alignment = other.alignment;
+ this->offset = other.offset;
+ return *this;
+ }
+ /*! Nothing happens here */
+ INLINE ~Constant(void) {}
+ private:
+ std::string name; //!< Optional name of the constant
+ uint32_t size; //!< Size of the constant
+ uint32_t alignment; //!< Alignment required for each constant
+ uint32_t offset; //!< Offset of the constant in the data segment
+ GBE_CLASS(Constant);
+ };
+
+ /*! A constant set is a set of immutable data associated to a compilation
+ * unit
+ */
+ class ConstantSet
+ {
+ public:
+ /*! Append a new constant in the constant set */
+ void append(const char*, const std::string&, uint32_t size, uint32_t alignment);
+ private:
+ vector<char> data; //!< The constant data serialized in one array
+ vector<Constant> constants;//!< Each constant description
+ GBE_CLASS(ConstantSet);
+ };
+
+} /* namespace ir */
+} /* namespace gbe */
+
+#endif /* __GBE_IR_CONSTANT_HPP__ */
+
diff --git a/backend/src/ir/context.cpp b/backend/src/ir/context.cpp
new file mode 100644
index 0000000..0135ae1
--- /dev/null
+++ b/backend/src/ir/context.cpp
@@ -0,0 +1,178 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file context.cpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#include "ir/context.hpp"
+#include "ir/unit.hpp"
+#include "ir/lowering.hpp"
+
+namespace gbe {
+namespace ir {
+
+ Context::Context(Unit &unit) :
+ unit(unit), fn(NULL), bb(NULL), usedLabels(NULL) {}
+
+ Context::~Context(void) {
+ for (const auto &elem : fnStack) GBE_SAFE_DELETE(elem.usedLabels);
+ GBE_SAFE_DELETE(usedLabels);
+ }
+
+ Function &Context::getFunction(void) {
+ GBE_ASSERTM(fn != NULL, "No function currently defined");
+ return *fn;
+ }
+
+ void Context::appendPushedConstant(Register reg, const PushLocation &pushed)
+ {
+ GBE_ASSERTM(fn != NULL, "No function currently defined");
+ GBE_ASSERTM(fn->pushMap.contains(reg) == false, "Register already pushed");
+ fn->pushMap.insert(std::make_pair(reg, pushed));
+ fn->locationMap.insert(std::make_pair(pushed, reg));
+ }
+
+ void Context::startFunction(const std::string &name) {
+ fnStack.push_back(StackElem(fn,bb,usedLabels));
+ fn = unit.newFunction(name);
+ usedLabels = GBE_NEW_NO_ARG(vector<uint8_t>);
+ bb = NULL;
+ }
+
+ void Context::endFunction(void) {
+ GBE_ASSERTM(fn != NULL, "No function to end");
+ GBE_ASSERT(fnStack.size() != 0);
+ GBE_ASSERT(usedLabels != NULL);
+
+ // Empty function -> append a return
+ if (fn->blockNum() == 0) this->RET();
+
+ // Check first that all branch instructions point to valid labels
+ GBE_ASSERT(usedLabels);
+#if GBE_DEBUG
+ for (auto usage : *usedLabels)
+ GBE_ASSERTM(usage != LABEL_IS_POINTED, "A label is used and not defined");
+#endif /* GBE_DEBUG */
+ GBE_DELETE(usedLabels);
+
+ // Remove all returns and insert one unique return block at the end of the
+ // function
+ lowerReturn(unit, fn->getName());
+
+ // Spill function argument to the stack if required and identify which
+ // function arguments can use constant push
+ lowerFunctionArguments(unit, fn->getName());
+
+ // Properly order labels and compute the CFG
+ fn->sortLabels();
+ fn->computeCFG();
+ const StackElem elem = fnStack.back();
+ fnStack.pop_back();
+ fn = elem.fn;
+ bb = elem.bb;
+ usedLabels = elem.usedLabels;
+ }
+
+ Register Context::reg(RegisterFamily family) {
+ GBE_ASSERTM(fn != NULL, "No function currently defined");
+ return fn->newRegister(family);
+ }
+
+ LabelIndex Context::label(void) {
+ GBE_ASSERTM(fn != NULL, "No function currently defined");
+ const LabelIndex index = fn->newLabel();
+ if (index >= usedLabels->size()) {
+ usedLabels->resize(index + 1);
+ (*usedLabels)[index] = 0;
+ }
+ return index;
+ }
+
+ void Context::input(FunctionArgument::Type type, Register reg, uint32_t elementSize) {
+ GBE_ASSERTM(fn != NULL, "No function currently defined");
+ GBE_ASSERTM(reg < fn->file.regNum(), "Out-of-bound register");
+ FunctionArgument *arg = GBE_NEW(FunctionArgument, type, reg, elementSize);
+ fn->args.push_back(arg);
+ }
+
+ void Context::output(Register reg) {
+ GBE_ASSERTM(fn != NULL, "No function currently defined");
+ GBE_ASSERTM(reg < fn->file.regNum(), "Out-of-bound register");
+ fn->outputs.push_back(reg);
+ }
+
+ void Context::startBlock(void) {
+ GBE_ASSERTM(fn != NULL, "No function currently defined");
+ this->bb = GBE_NEW(BasicBlock, *fn);
+ fn->blocks.push_back(bb);
+ }
+
+ void Context::endBlock(void) {
+ this->bb = NULL;
+ }
+
+ void Context::append(const Instruction &insn) {
+ GBE_ASSERTM(fn != NULL, "No function currently defined");
+
+ // Start a new block if this is a label
+ if (insn.isMemberOf<LabelInstruction>() == true) {
+ this->endBlock();
+ this->startBlock();
+ const LabelIndex index = cast<LabelInstruction>(insn).getLabelIndex();
+ GBE_ASSERTM(index < fn->labelNum(), "Out-of-bound label");
+ GBE_ASSERTM(fn->labels[index] == NULL, "Label used in a previous block");
+ fn->labels[index] = bb;
+
+ // Now the label index is properly defined
+ GBE_ASSERT(index < usedLabels->size());
+ (*usedLabels)[index] |= LABEL_IS_DEFINED;
+ }
+ // We create a new label for a new block if the user did not do it
+ else if (bb == NULL) {
+ // this->startBlock();
+ const LabelIndex index = this->label();
+ const Instruction insn = ir::LABEL(index);
+ this->append(insn);
+ }
+
+ // Append the instruction in the stream
+ Instruction *insnPtr = fn->newInstruction(insn);
+ bb->append(*insnPtr);
+#if GBE_DEBUG
+ std::string whyNot;
+ GBE_ASSERTM(insnPtr->wellFormed(whyNot), whyNot.c_str());
+#endif /* GBE_DEBUG */
+
+ // Close the current block if this is a branch
+ if (insn.isMemberOf<BranchInstruction>() == true) {
+ // We must book keep the fact that the label is used
+ if (insn.getOpcode() == OP_BRA) {
+ const BranchInstruction &branch = cast<BranchInstruction>(insn);
+ const LabelIndex index = branch.getLabelIndex();
+ GBE_ASSERT(index < usedLabels->size());
+ (*usedLabels)[index] |= LABEL_IS_POINTED;
+ }
+ this->endBlock();
+ }
+ }
+
+} /* namespace ir */
+} /* namespace gbe */
+
diff --git a/backend/src/ir/context.hpp b/backend/src/ir/context.hpp
new file mode 100644
index 0000000..fc5be0f
--- /dev/null
+++ b/backend/src/ir/context.hpp
@@ -0,0 +1,214 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file context.hpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#ifndef __GBE_IR_CONTEXT_HPP__
+#define __GBE_IR_CONTEXT_HPP__
+
+#include "ir/instruction.hpp"
+#include "ir/function.hpp"
+#include "ir/register.hpp"
+#include "ir/immediate.hpp"
+#include "ir/unit.hpp"
+#include "sys/vector.hpp"
+#include <tuple>
+
+namespace gbe {
+namespace ir {
+
+ /*! A context allows an easy creation of the functions (instruction stream and
+ * the set of immediates and registers needed for it) and constant arrays
+ */
+ class Context
+ {
+ public:
+ /*! Create a new context for this unit */
+ Context(Unit &unit);
+ /*! Free resources needed by context */
+ virtual ~Context(void);
+ /*! Create a new function "name" */
+ void startFunction(const std::string &name);
+ /*! Close the function */
+ void endFunction(void);
+ /*! Get the current processed unit */
+ INLINE Unit &getUnit(void) { return unit; }
+ /*! Get the current processed function */
+ Function &getFunction(void);
+ /*! Set the SIMD width of the function */
+ void setSimdWidth(uint32_t width) const {
+ GBE_ASSERT(width == 8 || width == 16);
+ fn->simdWidth = width;
+ }
+ /*! Append a new pushed constant */
+ void appendPushedConstant(Register reg, const PushLocation &pushed);
+ /*! Create a new register with the given family for the current function */
+ Register reg(RegisterFamily family);
+ /*! Create a new immediate value */
+ template <typename T> INLINE ImmediateIndex newImmediate(T value) {
+ const Immediate imm(value);
+ return fn->newImmediate(imm);
+ }
+ /*! Create an integer immediate value */
+ INLINE ImmediateIndex newIntegerImmediate(int64_t x, Type type) {
+ switch (type) {
+ case TYPE_S8: return this->newImmediate(int8_t(x));
+ case TYPE_U8: return this->newImmediate(uint8_t(x));
+ case TYPE_S16: return this->newImmediate(int16_t(x));
+ case TYPE_U16: return this->newImmediate(uint16_t(x));
+ case TYPE_S32: return this->newImmediate(int32_t(x));
+ case TYPE_U32: return this->newImmediate(uint32_t(x));
+ case TYPE_S64: return this->newImmediate(int64_t(x));
+ case TYPE_U64: return this->newImmediate(uint64_t(x));
+ default: NOT_SUPPORTED; return ImmediateIndex(0);
+ }
+ return ImmediateIndex(0);
+ }
+
+ /*! Set an immediate value */
+ template <typename T> INLINE void setImmediate(ImmediateIndex index, T value) {
+ const Immediate imm(value);
+ fn->immediates[index] = imm;
+ }
+ /*! Create a new register holding the given value. A LOADI is pushed */
+ template <typename T> INLINE Register immReg(T value) {
+ GBE_ASSERTM(fn != NULL, "No function currently defined");
+ const Immediate imm(value);
+ const ImmediateIndex index = fn->newImmediate(imm);
+ const RegisterFamily family = getFamily(imm.type);
+ const Register reg = this->reg(family);
+ this->LOADI(imm.type, reg, index);
+ return reg;
+ }
+ /*! Create a new label for the current function */
+ LabelIndex label(void);
+ /*! Append a new input register for the function */
+ void input(FunctionArgument::Type type, Register reg, uint32_t elemSz = 0u);
+ /*! Append a new output register for the function */
+ void output(Register reg);
+ /*! Get the immediate value */
+ INLINE Immediate getImmediate(ImmediateIndex index) const {
+ return fn->getImmediate(index);
+ }
+ /*! Append a new tuple */
+ template <typename... Args> INLINE Tuple tuple(Args...args) {
+ GBE_ASSERTM(fn != NULL, "No function currently defined");
+ return fn->file.appendTuple(args...);
+ }
+ /*! Make a tuple from an array of register */
+ INLINE Tuple arrayTuple(const Register *reg, uint32_t regNum) {
+ GBE_ASSERTM(fn != NULL, "No function currently defined");
+ return fn->file.appendArrayTuple(reg, regNum);
+ }
+ /*! We just use variadic templates to forward instruction functions */
+#define DECL_INSN(NAME, FAMILY) \
+ template <typename... Args> INLINE void NAME(Args...args);
+#include "ir/instruction.hxx"
+#undef DECL_INSN
+ /*! Return the pointer size handled by the unit */
+ INLINE PointerSize getPointerSize(void) const {
+ return unit.getPointerSize();
+ }
+ /*! Return the family of registers that contain pointer */
+ INLINE RegisterFamily getPointerFamily(void) const {
+ return unit.getPointerFamily();
+ }
+#define DECL_THREE_SRC_INSN(NAME) \
+ INLINE void NAME(Type type, \
+ Register dst, \
+ Register src0, \
+ Register src1, \
+ Register src2) \
+ { \
+ const Tuple index = this->tuple(src0, src1, src2); \
+ this->NAME(type, dst, index); \
+ }
+ DECL_THREE_SRC_INSN(SEL);
+#undef DECL_THREE_SRC_INSN
+
+ /*! For all unary functions */
+ void ALU1(Opcode opcode, Type type, Register dst, Register src) {
+ const Instruction insn = gbe::ir::ALU1(opcode, type, dst, src);
+ this->append(insn);
+ }
+
+ /*! LOAD with the destinations directly specified */
+ template <typename... Args>
+ void LOAD(Type type, Register offset, AddressSpace space, bool dwAligned, Args...values)
+ {
+ const Tuple index = this->tuple(values...);
+ const uint16_t valueNum = std::tuple_size<std::tuple<Args...>>::value;
+ GBE_ASSERT(valueNum > 0);
+ this->LOAD(type, index, offset, space, valueNum, dwAligned);
+ }
+
+ /*! STORE with the sources directly specified */
+ template <typename... Args>
+ void STORE(Type type, Register offset, AddressSpace space, bool dwAligned, Args...values)
+ {
+ const Tuple index = this->tuple(values...);
+ const uint16_t valueNum = std::tuple_size<std::tuple<Args...>>::value;
+ GBE_ASSERT(valueNum > 0);
+ this->STORE(type, index, offset, space, valueNum, dwAligned);
+ }
+
+ protected:
+ /*! A block must be started with a label */
+ void startBlock(void);
+ /*! A block must be ended with a branch */
+ void endBlock(void);
+ /*! Append the instruction in the current basic block */
+ void append(const Instruction &insn);
+ Unit &unit; //!< A unit is associated to a contect
+ Function *fn; //!< Current function we are processing
+ BasicBlock *bb; //!< Current basic block we are filling
+ static const uint8_t LABEL_IS_POINTED = 1 << 0; //!< Branch is using it
+ static const uint8_t LABEL_IS_DEFINED = 1 << 1; //!< Label is defining it
+ vector<uint8_t> *usedLabels;
+ /*! Functions can be defined recursiely */
+ struct StackElem {
+ INLINE StackElem(Function *fn, BasicBlock *bb, vector<uint8_t> *usedLabels)
+ : fn(fn), bb(bb), usedLabels(usedLabels)
+ {}
+ Function *fn; //!< Function to process
+ BasicBlock *bb; //!< Basic block currently processed
+ vector<uint8_t> *usedLabels; //!< Store all labels that are defined
+ };
+ vector<StackElem> fnStack; //!< Stack of functions still to finish
+ GBE_CLASS(Context);
+ };
+
+ // Use argument checker to assert argument value correctness
+#define DECL_INSN(NAME, FAMILY) \
+ template <typename... Args> \
+ INLINE void Context::NAME(Args...args) { \
+ GBE_ASSERTM(fn != NULL, "No function currently defined"); \
+ const Instruction insn = gbe::ir::NAME(args...); \
+ this->append(insn); \
+ }
+#include "ir/instruction.hxx"
+#undef DECL_INSN
+
+} /* namespace ir */
+} /* namespace gbe */
+
+#endif /* __GBE_IR_CONTEXT_HPP__ */
+
diff --git a/backend/src/ir/function.cpp b/backend/src/ir/function.cpp
new file mode 100644
index 0000000..bce1b9a
--- /dev/null
+++ b/backend/src/ir/function.cpp
@@ -0,0 +1,316 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file function.cpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#include "ir/function.hpp"
+#include "ir/unit.hpp"
+#include "sys/map.hpp"
+
+namespace gbe {
+namespace ir {
+
+ ///////////////////////////////////////////////////////////////////////////
+ // PushLocation
+ ///////////////////////////////////////////////////////////////////////////
+
+ Register PushLocation::getRegister(void) const {
+ const Function::LocationMap &locationMap = fn.getLocationMap();
+ GBE_ASSERT(locationMap.contains(*this) == true);
+ return locationMap.find(*this)->second;
+ }
+
+ ///////////////////////////////////////////////////////////////////////////
+ // Function
+ ///////////////////////////////////////////////////////////////////////////
+
+ Function::Function(const std::string &name, const Unit &unit, Profile profile) :
+ name(name), unit(unit), profile(profile), simdWidth(0), useSLM(false)
+ {
+ initProfile(*this);
+ }
+
+ Function::~Function(void) {
+ for (auto block : blocks) GBE_DELETE(block);
+ for (auto arg : args) GBE_DELETE(arg);
+ }
+
+ RegisterFamily Function::getPointerFamily(void) const {
+ return unit.getPointerFamily();
+ }
+
+ void Function::sortLabels(void) {
+ uint32_t last = 0;
+
+ // Compute the new labels and patch the label instruction
+ map<LabelIndex, LabelIndex> labelMap;
+ foreachInstruction([&](Instruction &insn) {
+ if (insn.getOpcode() != OP_LABEL) return;
+
+ // Create the new label
+ const Instruction newLabel = LABEL(LabelIndex(last));
+
+ // Replace the previous label instruction
+ LabelInstruction &label = cast<LabelInstruction>(insn);
+ const LabelIndex index = label.getLabelIndex();
+ labelMap.insert(std::make_pair(index, LabelIndex(last++)));
+ newLabel.replace(&insn);
+ });
+
+ // Patch all branch instructions with the new labels
+ foreachInstruction([&](Instruction &insn) {
+ if (insn.getOpcode() != OP_BRA) return;
+
+ // Get the current branch instruction
+ BranchInstruction &bra = cast<BranchInstruction>(insn);
+ const LabelIndex index = bra.getLabelIndex();
+ const LabelIndex newIndex = labelMap.find(index)->second;
+
+ // Insert the patched branch instruction
+ if (bra.isPredicated() == true) {
+ const Instruction newBra = BRA(newIndex, bra.getPredicateIndex());
+ newBra.replace(&insn);
+ } else {
+ const Instruction newBra = BRA(newIndex);
+ newBra.replace(&insn);
+ }
+ });
+
+ // Reset the label to block mapping
+ this->labels.resize(last);
+ foreachBlock([&](BasicBlock &bb) {
+ const Instruction *first = bb.getFirstInstruction();
+ const LabelInstruction *label = cast<LabelInstruction>(first);
+ const LabelIndex index = label->getLabelIndex();
+ this->labels[index] = &bb;
+ });
+ }
+
+ LabelIndex Function::newLabel(void) {
+ GBE_ASSERTM(labels.size() < 0xffff,
+ "Too many labels are defined (65536 only are supported)");
+ const LabelIndex index(labels.size());
+ labels.push_back(NULL);
+ return index;
+ }
+
+ void Function::outImmediate(std::ostream &out, ImmediateIndex index) const {
+ GBE_ASSERT(index < immediates.size());
+ const Immediate imm = immediates[index];
+ switch (imm.type) {
+ case TYPE_BOOL: out << !!imm.data.u8; break;
+ case TYPE_S8: out << imm.data.s8; break;
+ case TYPE_U8: out << imm.data.u8; break;
+ case TYPE_S16: out << imm.data.s16; break;
+ case TYPE_U16: out << imm.data.u16; break;
+ case TYPE_S32: out << imm.data.s32; break;
+ case TYPE_U32: out << imm.data.u32; break;
+ case TYPE_S64: out << imm.data.s64; break;
+ case TYPE_U64: out << imm.data.u64; break;
+ case TYPE_HALF: out << "half(" << imm.data.u16 << ")"; break;
+ case TYPE_FLOAT: out << imm.data.f32; break;
+ case TYPE_DOUBLE: out << imm.data.f64; break;
+ }
+ }
+
+ uint32_t Function::getLargestBlockSize(void) const {
+ uint32_t insnNum = 0;
+ foreachBlock([&insnNum](const ir::BasicBlock &bb) {
+ insnNum = std::max(insnNum, uint32_t(bb.size()));
+ });
+ return insnNum;
+ }
+
+ uint32_t Function::getFirstSpecialReg(void) const {
+ return this->profile == PROFILE_OCL ? 0u : ~0u;
+ }
+
+ uint32_t Function::getSpecialRegNum(void) const {
+ return this->profile == PROFILE_OCL ? ocl::regNum : ~0u;
+ }
+
+ bool Function::isEntryBlock(const BasicBlock &bb) const {
+ if (this->blockNum() == 0)
+ return false;
+ else
+ return &bb == this->blocks[0];
+ }
+
+ const BasicBlock &Function::getTopBlock(void) const {
+ GBE_ASSERT(blockNum() > 0 && blocks[0] != NULL);
+ return *blocks[0];
+ }
+
+ const BasicBlock &Function::getBottomBlock(void) const {
+ const uint32_t n = blockNum();
+ GBE_ASSERT(n > 0 && blocks[n-1] != NULL);
+ return *blocks[n-1];
+ }
+
+ BasicBlock &Function::getBottomBlock(void) {
+ const uint32_t n = blockNum();
+ GBE_ASSERT(n > 0 && blocks[n-1] != NULL);
+ return *blocks[n-1];
+ }
+
+ const BasicBlock &Function::getBlock(LabelIndex label) const {
+ GBE_ASSERT(label < labelNum() && labels[label] != NULL);
+ return *labels[label];
+ }
+
+ const LabelInstruction *Function::getLabelInstruction(LabelIndex index) const {
+ const BasicBlock *bb = this->labels[index];
+ const Instruction *first = bb->getFirstInstruction();
+ return cast<LabelInstruction>(first);
+ }
+
+ /*! Indicate if the given register is a special one (like localID in OCL) */
+ bool Function::isSpecialReg(const Register ®) const {
+ const uint32_t ID = uint32_t(reg);
+ const uint32_t firstID = this->getFirstSpecialReg();
+ const uint32_t specialNum = this->getSpecialRegNum();
+ return ID >= firstID && ID < firstID + specialNum;
+ }
+
+ void Function::computeCFG(void) {
+ // Clear possible previously computed CFG and compute the direct
+ // predecessors and successors
+ BasicBlock *prev = NULL;
+ this->foreachBlock([this, &prev](BasicBlock &bb) {
+ bb.successors.clear();
+ bb.predecessors.clear();
+ if (prev != NULL) {
+ prev->nextBlock = &bb;
+ bb.prevBlock = prev;
+ }
+ prev = &bb;
+ });
+
+ // Update it. Do not forget that a branch can also jump to the next block
+ BasicBlock *jumpToNext = NULL;
+ this->foreachBlock([this, &jumpToNext](BasicBlock &bb) {
+ if (jumpToNext) {
+ jumpToNext->successors.insert(&bb);
+ bb.predecessors.insert(jumpToNext);
+ jumpToNext = NULL;
+ }
+ if (bb.size() == 0) return;
+ Instruction *last = bb.getLastInstruction();
+ if (last->isMemberOf<BranchInstruction>() == false) {
+ jumpToNext = &bb;
+ return;
+ }
+ const BranchInstruction &insn = cast<BranchInstruction>(*last);
+ if (insn.getOpcode() == OP_BRA) {
+ const LabelIndex label = insn.getLabelIndex();
+ BasicBlock *target = this->blocks[label];
+ GBE_ASSERT(target != NULL);
+ target->predecessors.insert(&bb);
+ bb.successors.insert(target);
+ if (insn.isPredicated() == true) jumpToNext = &bb;
+ }
+ });
+ }
+
+ std::ostream &operator<< (std::ostream &out, const Function &fn)
+ {
+ out << ".decl_function " << fn.getName() << std::endl;
+ out << fn.getRegisterFile();
+ out << "## " << fn.argNum() << " input register"
+ << (fn.argNum() ? "s" : "") << " ##" << std::endl;
+ for (uint32_t i = 0; i < fn.argNum(); ++i) {
+ const FunctionArgument &input = fn.getArg(i);
+ out << "decl_input.";
+ switch (input.type) {
+ case FunctionArgument::GLOBAL_POINTER: out << "global"; break;
+ case FunctionArgument::LOCAL_POINTER: out << "local"; break;
+ case FunctionArgument::CONSTANT_POINTER: out << "constant"; break;
+ case FunctionArgument::VALUE: out << "value"; break;
+ case FunctionArgument::STRUCTURE:
+ out << "structure." << input.size;
+ break;
+ default: break;
+ }
+ out << " %" << input.reg << std::endl;
+ }
+ out << "## " << fn.outputNum() << " output register"
+ << (fn.outputNum() ? "s" : "") << " ##" << std::endl;
+ for (uint32_t i = 0; i < fn.outputNum(); ++i)
+ out << "decl_output %" << fn.getOutput(i) << std::endl;
+ out << "## " << fn.pushedNum() << " pushed register" << std::endl;
+ const Function::PushMap &pushMap = fn.getPushMap();
+ for (const auto &pushed : pushMap) {
+ out << "decl_pushed %" << pushed.first
+ << " @{" << pushed.second.argID << ","
+ << pushed.second.offset << "}" << std::endl;
+ }
+ out << "## " << fn.blockNum() << " block"
+ << (fn.blockNum() ? "s" : "") << " ##" << std::endl;
+ fn.foreachBlock([&](const BasicBlock &bb) {
+ const_cast<BasicBlock&>(bb).foreach([&out] (const Instruction &insn) {
+ out << insn << std::endl;
+ });
+ out << std::endl;
+ });
+ out << ".end_function" << std::endl;
+ return out;
+ }
+
+ ///////////////////////////////////////////////////////////////////////////
+ // Basic Block
+ ///////////////////////////////////////////////////////////////////////////
+
+ BasicBlock::BasicBlock(Function &fn) : fn(fn) {
+ this->nextBlock = this->prevBlock = NULL;
+ }
+
+ BasicBlock::~BasicBlock(void) {
+ this->foreach([this] (Instruction &insn) {
+ this->fn.deleteInstruction(&insn);
+ });
+ }
+
+ void BasicBlock::append(Instruction &insn) {
+ insn.setParent(this);
+ this->push_back(&insn);
+ }
+
+ Instruction *BasicBlock::getFirstInstruction(void) const {
+ GBE_ASSERT(this->begin() != this->end());
+ const Instruction &insn = *this->begin();
+ return const_cast<Instruction*>(&insn);
+ }
+
+ Instruction *BasicBlock::getLastInstruction(void) const {
+ GBE_ASSERT(this->begin() != this->end());
+ const Instruction &insn = *(--this->end());
+ return const_cast<Instruction*>(&insn);
+ }
+
+ LabelIndex BasicBlock::getLabelIndex(void) const {
+ const Instruction *first = this->getFirstInstruction();
+ const LabelInstruction *label = cast<LabelInstruction>(first);
+ return label->getLabelIndex();
+ }
+
+} /* namespace ir */
+} /* namespace gbe */
+
diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp
new file mode 100644
index 0000000..9b73f54
--- /dev/null
+++ b/backend/src/ir/function.hpp
@@ -0,0 +1,307 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file function.hpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#ifndef __GBE_IR_FUNCTION_HPP__
+#define __GBE_IR_FUNCTION_HPP__
+
+#include "ir/immediate.hpp"
+#include "ir/register.hpp"
+#include "ir/instruction.hpp"
+#include "ir/profile.hpp"
+#include "sys/vector.hpp"
+#include "sys/set.hpp"
+#include "sys/map.hpp"
+#include "sys/alloc.hpp"
+
+#include <ostream>
+
+namespace gbe {
+namespace ir {
+
+ /*! Commonly used in the CFG */
+ typedef set<BasicBlock*> BlockSet;
+ class Unit; // Function belongs to a unit
+
+ /*! Function basic blocks really belong to a function since:
+ * 1 - registers used in the basic blocks belongs to the function register
+ * file
+ * 2 - branches point to basic blocks of the same function
+ */
+ class BasicBlock : public NonCopyable, public intrusive_list<Instruction>
+ {
+ public:
+ /*! Empty basic block */
+ BasicBlock(Function &fn);
+ /*! Releases all the instructions */
+ ~BasicBlock(void);
+ /*! Append a new instruction at the end of the stream */
+ void append(Instruction &insn);
+ /*! Get the parent function */
+ Function &getParent(void) { return fn; }
+ const Function &getParent(void) const { return fn; }
+ /*! Get the next and previous allocated block */
+ BasicBlock *getNextBlock(void) const { return this->nextBlock; }
+ BasicBlock *getPrevBlock(void) const { return this->prevBlock; }
+ /*! Get / set the first and last instructions */
+ Instruction *getFirstInstruction(void) const;
+ Instruction *getLastInstruction(void) const;
+ /*! Get successors and predecessors */
+ const BlockSet &getSuccessorSet(void) const { return successors; }
+ const BlockSet &getPredecessorSet(void) const { return predecessors; }
+ /*! Get the label index of this block */
+ LabelIndex getLabelIndex(void) const;
+ /*! Apply the given functor on all instructions */
+ template <typename T>
+ INLINE void foreach(const T &functor) {
+ auto it = this->begin();
+ while (it != this->end()) {
+ auto curr = it++;
+ functor(*curr);
+ }
+ }
+ private:
+ friend class Function; //!< Owns the basic blocks
+ BlockSet predecessors; //!< Incoming blocks
+ BlockSet successors; //!< Outgoing blocks
+ BasicBlock *nextBlock; //!< Block allocated just after this one
+ BasicBlock *prevBlock; //!< Block allocated just before this one
+ Function &fn; //!< Function the block belongs to
+ GBE_CLASS(BasicBlock);
+ };
+
+ /*! In fine, function input arguments can be pushed from the constant
+ * buffer if they are structures. Other arguments can be images (textures)
+ * and will also require special treatment.
+ */
+ struct FunctionArgument {
+ enum Type {
+ GLOBAL_POINTER = 0, // __global
+ CONSTANT_POINTER = 1, // __constant
+ LOCAL_POINTER = 2, // __local
+ VALUE = 3, // int, float
+ STRUCTURE = 4, // struct foo
+ IMAGE = 5 // image*d_t
+ };
+ /*! Create a function input argument */
+ INLINE FunctionArgument(Type type, Register reg, uint32_t size) :
+ type(type), reg(reg), size(size) {}
+ Type type; //!< Gives the type of argument we have
+ Register reg; //!< Holds the argument
+ uint32_t size; //!< == sizeof(void*) for ptr, sizeof(elem) for the rest
+ GBE_STRUCT(FunctionArgument); // Use custom allocator
+ };
+
+ /*! Maps the pushed register to the function argument */
+ struct PushLocation {
+ INLINE PushLocation(const Function &fn, uint32_t argID, uint32_t offset) :
+ fn(fn), argID(argID), offset(offset) {}
+ /*! Get the pushed virtual register */
+ Register getRegister(void) const;
+ const Function &fn; //!< Function it belongs to
+ uint32_t argID; //!< Function argument
+ uint32_t offset; //!< Offset in the function argument
+ GBE_STRUCT(PushLocation); // Use custom allocator
+ };
+
+ /*! For maps and sets */
+ INLINE bool operator< (const PushLocation &arg0, const PushLocation &arg1) {
+ if (arg0.argID != arg1.argID) return arg0.argID < arg1.argID;
+ return arg0.offset < arg1.offset;
+ }
+
+ /*! A function is :
+ * - a register file
+ * - a set of basic block layout into a CGF
+ * - input arguments
+ */
+ class Function : public NonCopyable
+ {
+ public:
+ /*! Map of all pushed registers */
+ typedef map<Register, PushLocation> PushMap;
+ /*! Map of all pushed location (i.e. part of function argument) */
+ typedef map<PushLocation, Register> LocationMap;
+ /*! Create an empty function */
+ Function(const std::string &name, const Unit &unit, Profile profile = PROFILE_OCL);
+ /*! Release everything *including* the basic block pointers */
+ ~Function(void);
+ /*! Get the function profile */
+ INLINE Profile getProfile(void) const { return profile; }
+ /*! Get a new valid register */
+ INLINE Register newRegister(RegisterFamily family) {
+ return this->file.append(family);
+ }
+ /*! Get the function name */
+ const std::string &getName(void) const { return name; }
+ /*! When set, we do not have choice any more in the back end for it */
+ INLINE void setSimdWidth(uint32_t width) { simdWidth = width; }
+ /*! Get the SIMD width (0 if not forced) */
+ uint32_t getSimdWidth(void) const { return simdWidth; }
+ /*! Extract the register from the register file */
+ INLINE RegisterData getRegisterData(Register reg) const { return file.get(reg); }
+ /*! Get the register family from the register itself */
+ INLINE RegisterFamily getRegisterFamily(Register reg) const {
+ return this->getRegisterData(reg).family;
+ }
+ /*! Get the register from the tuple vector */
+ INLINE Register getRegister(Tuple ID, uint32_t which) const {
+ return file.get(ID, which);
+ }
+ /*! Set the register from the tuple vector */
+ INLINE void setRegister(Tuple ID, uint32_t which, Register reg) {
+ file.set(ID, which, reg);
+ }
+ /*! Get the register file */
+ INLINE const RegisterFile &getRegisterFile(void) const { return file; }
+ /*! Get the given value ie immediate from the function */
+ INLINE Immediate getImmediate(ImmediateIndex ID) const {
+ return immediates[ID];
+ }
+ /*! Create a new immediate and returns its index */
+ INLINE ImmediateIndex newImmediate(const Immediate &imm) {
+ const ImmediateIndex index(this->immediateNum());
+ this->immediates.push_back(imm);
+ return index;
+ }
+ /*! Fast allocation / deallocation of instructions */
+ DECL_POOL(Instruction, insnPool);
+ /*! Get input argument */
+ INLINE const FunctionArgument &getArg(uint32_t ID) const {
+ GBE_ASSERT(args[ID] != NULL);
+ return *args[ID];
+ }
+ INLINE FunctionArgument &getArg(uint32_t ID) {
+ GBE_ASSERT(args[ID] != NULL);
+ return *args[ID];
+ }
+ /*! Get the number of pushed registers */
+ INLINE uint32_t pushedNum(void) const { return pushMap.size(); }
+ /*! Get the pushed data location for the given register */
+ INLINE const PushLocation *getPushLocation(Register reg) const {
+ auto it = pushMap.find(reg);
+ if (it == pushMap.end())
+ return NULL;
+ else
+ return &it->second;
+ }
+ /*! Get the map of pushed registers */
+ const PushMap &getPushMap(void) const { return this->pushMap; }
+ /*! Get the map of pushed registers */
+ const LocationMap &getLocationMap(void) const { return this->locationMap; }
+ /*! Get input argument from the register (linear research). Return NULL if
+ * this is not an input argument
+ */
+ INLINE const FunctionArgument *getArg(const Register ®) const {
+ for (auto arg : args) if (arg->reg == reg) return arg;
+ return NULL;
+ }
+ /*! Get output register */
+ INLINE Register getOutput(uint32_t ID) const { return outputs[ID]; }
+ /*! Get the argument location for the pushed register */
+ INLINE const PushLocation &getPushLocation(Register reg) {
+ GBE_ASSERT(pushMap.contains(reg) == true);
+ return pushMap.find(reg)->second;
+ }
+ /*! Says if this is the top basic block (entry point) */
+ bool isEntryBlock(const BasicBlock &bb) const;
+ /*! Get function the entry point block */
+ const BasicBlock &getTopBlock(void) const;
+ /*! Get the last block */
+ const BasicBlock &getBottomBlock(void) const;
+ /*! Get the last block */
+ BasicBlock &getBottomBlock(void);
+ /*! Get block from its label */
+ const BasicBlock &getBlock(LabelIndex label) const;
+ /*! Get the label instruction from its label index */
+ const LabelInstruction *getLabelInstruction(LabelIndex index) const;
+ /*! Return the number of instructions of the largest basic block */
+ uint32_t getLargestBlockSize(void) const;
+ /*! Get the first index of the special registers and number of them */
+ uint32_t getFirstSpecialReg(void) const;
+ uint32_t getSpecialRegNum(void) const;
+ /*! Indicate if the given register is a special one (like localID in OCL) */
+ bool isSpecialReg(const Register ®) const;
+ /*! Create a new label (still not bound to a basic block) */
+ LabelIndex newLabel(void);
+ /*! Create the control flow graph */
+ void computeCFG(void);
+ /*! Sort labels in increasing orders (top block has the smallest label) */
+ void sortLabels(void);
+ /*! Get the pointer family */
+ RegisterFamily getPointerFamily(void) const;
+ /*! Number of registers in the register file */
+ INLINE uint32_t regNum(void) const { return file.regNum(); }
+ /*! Number of register tuples in the register file */
+ INLINE uint32_t tupleNum(void) const { return file.tupleNum(); }
+ /*! Number of labels in the function */
+ INLINE uint32_t labelNum(void) const { return labels.size(); }
+ /*! Number of immediate values in the function */
+ INLINE uint32_t immediateNum(void) const { return immediates.size(); }
+ /*! Get the number of argument register */
+ INLINE uint32_t argNum(void) const { return args.size(); }
+ /*! Get the number of output register */
+ INLINE uint32_t outputNum(void) const { return outputs.size(); }
+ /*! Number of blocks in the function */
+ INLINE uint32_t blockNum(void) const { return blocks.size(); }
+ /*! Output an immediate value in a stream */
+ void outImmediate(std::ostream &out, ImmediateIndex index) const;
+ /*! Apply the given functor on all basic blocks */
+ template <typename T>
+ INLINE void foreachBlock(const T &functor) const {
+ for (auto block : blocks) functor(*block);
+ }
+ /*! Apply the given functor on all instructions */
+ template <typename T>
+ INLINE void foreachInstruction(const T &functor) const {
+ for (auto block : blocks) block->foreach(functor);
+ }
+ /*! Does it use SLM */
+ INLINE bool getUseSLM(void) const { return this->useSLM; }
+ /*! Change the SLM config for the function */
+ INLINE bool setUseSLM(bool useSLM) { return this->useSLM = useSLM; }
+ private:
+ friend class Context; //!< Can freely modify a function
+ std::string name; //!< Function name
+ const Unit &unit; //!< Function belongs to this unit
+ vector<FunctionArgument*> args; //!< Input registers of the function
+ vector<Register> outputs; //!< Output registers of the function
+ vector<BasicBlock*> labels; //!< Each label points to a basic block
+ vector<Immediate> immediates; //!< All immediate values in the function
+ vector<BasicBlock*> blocks; //!< All chained basic blocks
+ RegisterFile file; //!< RegisterDatas used by the instructions
+ Profile profile; //!< Current function profile
+ PushMap pushMap; //!< Pushed function arguments (reg->loc)
+ LocationMap locationMap; //!< Pushed function arguments (loc->reg)
+ uint32_t simdWidth; //!< 8 or 16 if forced, 0 otherwise
+ bool useSLM; //!< Is SLM required?
+ GBE_CLASS(Function); //!< Use custom allocator
+ };
+
+ /*! Output the function string in the given stream */
+ std::ostream &operator<< (std::ostream &out, const Function &fn);
+
+} /* namespace ir */
+} /* namespace gbe */
+
+#endif /* __GBE_IR_FUNCTION_HPP__ */
+
diff --git a/backend/src/ir/immediate.hpp b/backend/src/ir/immediate.hpp
new file mode 100644
index 0000000..67dd03f
--- /dev/null
+++ b/backend/src/ir/immediate.hpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file value.hpp
+ *
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#ifndef __GBE_IR_IMMEDIATE_HPP__
+#define __GBE_IR_IMMEDIATE_HPP__
+
+#include "ir/type.hpp"
+#include "sys/platform.hpp"
+
+namespace gbe {
+namespace ir {
+
+ /*! The value as stored in the instruction */
+ class Immediate
+ {
+ public:
+ INLINE Immediate(void) {}
+#define DECL_CONSTRUCTOR(TYPE, FIELD, IR_TYPE) \
+ Immediate(TYPE FIELD) { \
+ this->type = IR_TYPE; \
+ this->data.u64 = 0llu; \
+ this->data.FIELD = FIELD; \
+ }
+ DECL_CONSTRUCTOR(bool, b, TYPE_BOOL)
+ DECL_CONSTRUCTOR(int8_t, s8, TYPE_S8)
+ DECL_CONSTRUCTOR(uint8_t, u8, TYPE_U8)
+ DECL_CONSTRUCTOR(int16_t, s16, TYPE_S16)
+ DECL_CONSTRUCTOR(uint16_t, u16, TYPE_S16)
+ DECL_CONSTRUCTOR(int32_t, s32, TYPE_S32)
+ DECL_CONSTRUCTOR(uint32_t, u32, TYPE_S32)
+ DECL_CONSTRUCTOR(int64_t, s64, TYPE_S64)
+ DECL_CONSTRUCTOR(uint64_t, u64, TYPE_S64)
+ DECL_CONSTRUCTOR(float, f32, TYPE_FLOAT)
+ DECL_CONSTRUCTOR(double, f64, TYPE_DOUBLE)
+#undef DECL_CONSTRUCTOR
+ union {
+ bool b;
+ int8_t s8;
+ uint8_t u8;
+ int16_t s16;
+ uint16_t u16;
+ int32_t s32;
+ uint32_t u32;
+ int64_t s64;
+ uint64_t u64;
+ float f32;
+ double f64;
+ } data; //!< Value to store
+ Type type; //!< Type of the value
+ GBE_CLASS(Immediate);
+ };
+
+ /*! Compare two immediates */
+ INLINE bool operator< (const Immediate &imm0, const Immediate &imm1) {
+ if (imm0.type != imm1.type)
+ return uint32_t(imm0.type) < uint32_t(imm1.type);
+ return imm0.data.u64 < imm1.data.u64;
+ }
+
+ /*! A value is stored in a per-function vector. This is the index to it */
+ TYPE_SAFE(ImmediateIndex, uint16_t)
+
+} /* namespace ir */
+} /* namespace gbe */
+
+#endif /* __GBE_IR_IMMEDIATE_HPP__ */
+
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
new file mode 100644
index 0000000..2d5a3f1
--- /dev/null
+++ b/backend/src/ir/instruction.cpp
@@ -0,0 +1,1254 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file instruction.cpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#include "ir/instruction.hpp"
+#include "ir/function.hpp"
+
+namespace gbe {
+namespace ir {
+
+ ///////////////////////////////////////////////////////////////////////////
+ // Implements the concrete implementations of the instruction classes. We
+ // cast an instruction to an internal class to run the given member function
+ ///////////////////////////////////////////////////////////////////////////
+ namespace internal
+ {
+#define ALIGNED_INSTRUCTION ALIGNED(AlignOf<Instruction>::value)
+
+ /*! Policy shared by all the internal instructions */
+ struct BasePolicy {
+ /*! Create an instruction from its internal representation */
+ Instruction convert(void) const {
+ return Instruction(reinterpret_cast<const char *>(&this->opcode));
+ }
+ /*! Output the opcode in the given stream */
+ INLINE void outOpcode(std::ostream &out) const {
+ switch (opcode) {
+#define DECL_INSN(OPCODE, CLASS) case OP_##OPCODE: out << #OPCODE; break;
+#include "instruction.hxx"
+#undef DECL_INSN
+ case OP_INVALID: NOT_SUPPORTED; break;
+ };
+ }
+
+ /*! Instruction opcode */
+ Opcode opcode;
+ };
+
+ /*! For regular n source instructions */
+ template <typename T, uint32_t srcNum>
+ struct NSrcPolicy {
+ INLINE uint32_t getSrcNum(void) const { return srcNum; }
+ INLINE Register getSrc(const Function &fn, uint32_t ID) const {
+ GBE_ASSERTM((int) ID < (int) srcNum, "Out-of-bound source");
+ return static_cast<const T*>(this)->src[ID];
+ }
+ INLINE void setSrc(Function &fn, uint32_t ID, Register reg) {
+ GBE_ASSERTM((int) ID < (int) srcNum, "Out-of-bound source");
+ static_cast<T*>(this)->src[ID] = reg;
+ }
+ };
+
+ /*! For regular n destinations instructions */
+ template <typename T, uint32_t dstNum>
+ struct NDstPolicy {
+ INLINE uint32_t getDstNum(void) const { return dstNum; }
+ INLINE Register getDst(const Function &fn, uint32_t ID) const {
+ GBE_ASSERTM((int) ID < (int) dstNum, "Out-of-bound destination");
+ return static_cast<const T*>(this)->dst[ID];
+ }
+ INLINE void setDst(Function &fn, uint32_t ID, Register reg) {
+ GBE_ASSERTM((int) ID < (int) dstNum, "Out-of-bound destination");
+ static_cast<T*>(this)->dst[ID] = reg;
+ }
+ };
+
+ /*! For instructions that use a tuple for source */
+ template <typename T>
+ struct TupleSrcPolicy {
+ INLINE uint32_t getSrcNum(void) const {
+ return static_cast<const T*>(this)->srcNum;
+ }
+ INLINE Register getSrc(const Function &fn, uint32_t ID) const {
+ GBE_ASSERTM(ID < static_cast<const T*>(this)->srcNum, "Out-of-bound source register");
+ return fn.getRegister(static_cast<const T*>(this)->src, ID);
+ }
+ INLINE void setSrc(Function &fn, uint32_t ID, Register reg) {
+ GBE_ASSERTM(ID < static_cast<const T*>(this)->srcNum, "Out-of-bound source register");
+ return fn.setRegister(static_cast<T*>(this)->src, ID, reg);
+ }
+ };
+
+ /*! All unary and binary arithmetic instructions */
+ template <uint32_t srcNum> // 1 or 2
+ class ALIGNED_INSTRUCTION NaryInstruction :
+ public BasePolicy,
+ public NSrcPolicy<NaryInstruction<srcNum>, srcNum>,
+ public NDstPolicy<NaryInstruction<1>, 1>
+ {
+ public:
+ INLINE Type getType(void) const { return this->type; }
+ INLINE bool wellFormed(const Function &fn, std::string &whyNot) const;
+ INLINE void out(std::ostream &out, const Function &fn) const;
+ Type type; //!< Type of the instruction
+ Register dst[1]; //!< Index of the register in the register file
+ Register src[srcNum]; //!< Indices of the sources
+ };
+
+ /*! All 1-source arithmetic instructions */
+ class ALIGNED_INSTRUCTION UnaryInstruction : public NaryInstruction<1>
+ {
+ public:
+ UnaryInstruction(Opcode opcode, Type type, Register dst, Register src) {
+ this->opcode = opcode;
+ this->type = type;
+ this->dst[0] = dst;
+ this->src[0] = src;
+ }
+ };
+
+ /*! All 2-source arithmetic instructions */
+ class ALIGNED_INSTRUCTION BinaryInstruction : public NaryInstruction<2>
+ {
+ public:
+ BinaryInstruction(Opcode opcode,
+ Type type,
+ Register dst,
+ Register src0,
+ Register src1) {
+ this->opcode = opcode;
+ this->type = type;
+ this->dst[0] = dst;
+ this->src[0] = src0;
+ this->src[1] = src1;
+ }
+ INLINE bool commutes(void) const {
+ switch (opcode) {
+ case OP_ADD:
+ case OP_XOR:
+ case OP_OR:
+ case OP_AND:
+ case OP_MUL:
+ return true;
+ default:
+ return false;
+ }
+ }
+ };
+
+ /*! Three sources mean we need a tuple to encode it */
+ class ALIGNED_INSTRUCTION SelectInstruction :
+ public BasePolicy,
+ public NDstPolicy<SelectInstruction, 1>,
+ public TupleSrcPolicy<SelectInstruction>
+ {
+ public:
+ SelectInstruction(Type type, Register dst, Tuple src) {
+ this->opcode = OP_SEL;
+ this->type = type;
+ this->dst[0] = dst;
+ this->src = src;
+ }
+ INLINE Type getType(void) const { return this->type; }
+ INLINE bool wellFormed(const Function &fn, std::string &whyNot) const;
+ INLINE void out(std::ostream &out, const Function &fn) const;
+ Type type; //!< Type of the instruction
+ Register dst[1]; //!< Dst is the register index
+ Tuple src; //!< 3 sources do not fit in 8 bytes -> use a tuple
+ static const uint32_t srcNum = 3;
+ };
+
+ /*! Comparison instructions take two sources of the same type and return a
+ * boolean value. Since it is pretty similar to binary instruction, we
+ * steal all the methods from it, except wellFormed (dst register is always
+ * a boolean value)
+ */
+ class ALIGNED_INSTRUCTION CompareInstruction :
+ public NaryInstruction<2>
+ {
+ public:
+ CompareInstruction(Opcode opcode,
+ Type type,
+ Register dst,
+ Register src0,
+ Register src1)
+ {
+ this->opcode = opcode;
+ this->type = type;
+ this->dst[0] = dst;
+ this->src[0] = src0;
+ this->src[1] = src1;
+ }
+ INLINE bool wellFormed(const Function &fn, std::string &whyNot) const;
+ };
+
+ class ALIGNED_INSTRUCTION ConvertInstruction :
+ public BasePolicy,
+ public NDstPolicy<ConvertInstruction, 1>,
+ public NSrcPolicy<ConvertInstruction, 1>
+ {
+ public:
+ ConvertInstruction(Type dstType,
+ Type srcType,
+ Register dst,
+ Register src)
+ {
+ this->opcode = OP_CVT;
+ this->dst[0] = dst;
+ this->src[0] = src;
+ this->dstType = dstType;
+ this->srcType = srcType;
+ }
+ INLINE Type getSrcType(void) const { return this->srcType; }
+ INLINE Type getDstType(void) const { return this->dstType; }
+ INLINE bool wellFormed(const Function &fn, std::string &whyNot) const;
+ INLINE void out(std::ostream &out, const Function &fn) const;
+ Register dst[1];
+ Register src[1];
+ Type dstType; //!< Type to convert to
+ Type srcType; //!< Type to convert from
+ };
+
+ class ALIGNED_INSTRUCTION BranchInstruction :
+ public BasePolicy,
+ public NDstPolicy<BranchInstruction, 0>
+ {
+ public:
+ INLINE BranchInstruction(Opcode op, LabelIndex labelIndex, Register predicate) {
+ GBE_ASSERT(op == OP_BRA);
+ this->opcode = op;
+ this->predicate = predicate;
+ this->labelIndex = labelIndex;
+ this->hasPredicate = true;
+ this->hasLabel = true;
+ }
+ INLINE BranchInstruction(Opcode op, LabelIndex labelIndex) {
+ GBE_ASSERT(op == OP_BRA);
+ this->opcode = OP_BRA;
+ this->labelIndex = labelIndex;
+ this->hasPredicate = false;
+ this->hasLabel = true;
+ }
+ INLINE BranchInstruction(Opcode op) {
+ GBE_ASSERT(op == OP_RET);
+ this->opcode = OP_RET;
+ this->hasPredicate = false;
+ this->hasLabel = false;
+ }
+ INLINE LabelIndex getLabelIndex(void) const {
+ GBE_ASSERTM(hasLabel, "No target label for this branch instruction");
+ return labelIndex;
+ }
+ INLINE uint32_t getSrcNum(void) const { return hasPredicate ? 1 : 0; }
+ INLINE Register getSrc(const Function &fn, uint32_t ID) const {
+ GBE_ASSERTM(hasPredicate, "No source for unpredicated branches");
+ GBE_ASSERTM(ID == 0, "Only one source for the branch instruction");
+ return predicate;
+ }
+ INLINE void setSrc(Function &fn, uint32_t ID, Register reg) {
+ GBE_ASSERTM(hasPredicate, "No source for unpredicated branches");
+ GBE_ASSERTM(ID == 0, "Only one source for the branch instruction");
+ predicate = reg;
+ }
+ INLINE bool isPredicated(void) const { return hasPredicate; }
+ INLINE bool wellFormed(const Function &fn, std::string &why) const;
+ INLINE void out(std::ostream &out, const Function &fn) const;
+ Register predicate; //!< Predication means conditional branch
+ LabelIndex labelIndex; //!< Index of the label the branch targets
+ bool hasPredicate:1; //!< Is it predicated?
+ bool hasLabel:1; //!< Is there any target label?
+ Register dst[]; //!< No destination
+ };
+
+ class ALIGNED_INSTRUCTION LoadInstruction :
+ public BasePolicy,
+ public NSrcPolicy<LoadInstruction, 1>
+ {
+ public:
+ LoadInstruction(Type type,
+ Tuple dstValues,
+ Register offset,
+ AddressSpace addrSpace,
+ uint32_t valueNum,
+ bool dwAligned)
+ {
+ GBE_ASSERT(valueNum < 128);
+ this->opcode = OP_LOAD;
+ this->type = type;
+ this->offset = offset;
+ this->values = dstValues;
+ this->addrSpace = addrSpace;
+ this->valueNum = valueNum;
+ this->dwAligned = dwAligned ? 1 : 0;
+ }
+ INLINE Register getDst(const Function &fn, uint32_t ID) const {
+ GBE_ASSERTM(ID < valueNum, "Out-of-bound source register");
+ return fn.getRegister(values, ID);
+ }
+ INLINE void setDst(Function &fn, uint32_t ID, Register reg) {
+ GBE_ASSERTM(ID < valueNum, "Out-of-bound source register");
+ fn.setRegister(values, ID, reg);
+ }
+ INLINE uint32_t getDstNum(void) const { return valueNum; }
+ INLINE Type getValueType(void) const { return type; }
+ INLINE uint32_t getValueNum(void) const { return valueNum; }
+ INLINE AddressSpace getAddressSpace(void) const { return addrSpace; }
+ INLINE bool wellFormed(const Function &fn, std::string &why) const;
+ INLINE void out(std::ostream &out, const Function &fn) const;
+ INLINE bool isAligned(void) const { return !!dwAligned; }
+ Type type; //!< Type to store
+ Register src[]; //!< Address where to load from
+ Register offset; //!< Alias to make it similar to store
+ Tuple values; //!< Values to load
+ AddressSpace addrSpace; //!< Where to load
+ uint8_t valueNum:7; //!< Number of values to load
+ uint8_t dwAligned:1; //!< DWORD aligned is what matters with GEN
+ };
+
+ class ALIGNED_INSTRUCTION StoreInstruction :
+ public BasePolicy, public NDstPolicy<StoreInstruction, 0>
+ {
+ public:
+ StoreInstruction(Type type,
+ Tuple values,
+ Register offset,
+ AddressSpace addrSpace,
+ uint32_t valueNum,
+ bool dwAligned)
+ {
+ GBE_ASSERT(valueNum < 255);
+ this->opcode = OP_STORE;
+ this->type = type;
+ this->offset = offset;
+ this->values = values;
+ this->addrSpace = addrSpace;
+ this->valueNum = valueNum;
+ this->dwAligned = dwAligned ? 1 : 0;
+ }
+ INLINE Register getSrc(const Function &fn, uint32_t ID) const {
+ GBE_ASSERTM(ID < valueNum + 1u, "Out-of-bound source register for store");
+ if (ID == 0u)
+ return offset;
+ else
+ return fn.getRegister(values, ID - 1);
+ }
+ INLINE void setSrc(Function &fn, uint32_t ID, Register reg) {
+ GBE_ASSERTM(ID < valueNum + 1u, "Out-of-bound source register for store");
+ if (ID == 0u)
+ offset = reg;
+ else
+ fn.setRegister(values, ID - 1, reg);
+ }
+ INLINE uint32_t getSrcNum(void) const { return valueNum + 1u; }
+ INLINE uint32_t getValueNum(void) const { return valueNum; }
+ INLINE Type getValueType(void) const { return type; }
+ INLINE AddressSpace getAddressSpace(void) const { return addrSpace; }
+ INLINE bool wellFormed(const Function &fn, std::string &why) const;
+ INLINE void out(std::ostream &out, const Function &fn) const;
+ INLINE bool isAligned(void) const { return !!dwAligned; }
+ Type type; //!< Type to store
+ Register offset; //!< First source is the offset where to store
+ Tuple values; //!< Values to store
+ AddressSpace addrSpace; //!< Where to store
+ uint8_t valueNum:7; //!< Number of values to store
+ uint8_t dwAligned:1; //!< DWORD aligned is what matters with GEN
+ Register dst[]; //!< No destination
+ };
+
+ class ALIGNED_INSTRUCTION SampleInstruction : // TODO
+ public BasePolicy,
+ public NDstPolicy<SampleInstruction, 0>,
+ public NSrcPolicy<SampleInstruction, 0>
+ {
+ public:
+ INLINE SampleInstruction(void) { this->opcode = OP_SAMPLE; }
+ INLINE bool wellFormed(const Function &fn, std::string &why) const;
+ INLINE void out(std::ostream &out, const Function &fn) const {
+ this->outOpcode(out);
+ out << " ... TODO";
+ }
+ Register dst[], src[];
+ };
+
+ class ALIGNED_INSTRUCTION TypedWriteInstruction : // TODO
+ public BasePolicy,
+ public NDstPolicy<TypedWriteInstruction, 0>,
+ public NSrcPolicy<TypedWriteInstruction, 0>
+ {
+ public:
+ INLINE TypedWriteInstruction(void) { this->opcode = OP_TYPED_WRITE; }
+ INLINE bool wellFormed(const Function &fn, std::string &why) const;
+ INLINE void out(std::ostream &out, const Function &fn) const {
+ this->outOpcode(out);
+ out << " ... TODO";
+ }
+ Register dst[], src[];
+ };
+
+ class ALIGNED_INSTRUCTION LoadImmInstruction :
+ public BasePolicy,
+ public NSrcPolicy<LoadImmInstruction, 0>,
+ public NDstPolicy<LoadImmInstruction, 1>
+ {
+ public:
+ INLINE LoadImmInstruction(Type type, Register dst, ImmediateIndex index)
+ {
+ this->dst[0] = dst;
+ this->opcode = OP_LOADI;
+ this->immediateIndex = index;
+ this->type = type;
+ }
+ INLINE Immediate getImmediate(const Function &fn) const {
+ return fn.getImmediate(immediateIndex);
+ }
+ INLINE Type getType(void) const { return this->type; }
+ bool wellFormed(const Function &fn, std::string &why) const;
+ INLINE void out(std::ostream &out, const Function &fn) const;
+ Register dst[1]; //!< RegisterData to store into
+ Register src[]; //!< No source register
+ ImmediateIndex immediateIndex; //!< Index in the vector of immediates
+ Type type; //!< Type of the immediate
+ };
+
+ class ALIGNED_INSTRUCTION SyncInstruction :
+ public BasePolicy,
+ public NSrcPolicy<SyncInstruction, 0>,
+ public NDstPolicy<SyncInstruction, 0>
+ {
+ public:
+ INLINE SyncInstruction(uint32_t parameters) {
+ this->opcode = OP_SYNC;
+ this->parameters = parameters;
+ }
+ INLINE uint32_t getParameters(void) const { return this->parameters; }
+ INLINE bool wellFormed(const Function &fn, std::string &why) const;
+ INLINE void out(std::ostream &out, const Function &fn) const;
+ uint32_t parameters;
+ Register dst[], src[];
+ };
+
+ class ALIGNED_INSTRUCTION LabelInstruction :
+ public BasePolicy,
+ public NSrcPolicy<LabelInstruction, 0>,
+ public NDstPolicy<LabelInstruction, 0>
+ {
+ public:
+ INLINE LabelInstruction(LabelIndex labelIndex) {
+ this->opcode = OP_LABEL;
+ this->labelIndex = labelIndex;
+ }
+ INLINE LabelIndex getLabelIndex(void) const { return labelIndex; }
+ INLINE bool wellFormed(const Function &fn, std::string &why) const;
+ INLINE void out(std::ostream &out, const Function &fn) const;
+ LabelIndex labelIndex; //!< Index of the label
+ Register dst[], src[];
+ };
+
+#undef ALIGNED_INSTRUCTION
+
+ /////////////////////////////////////////////////////////////////////////
+ // Implements all the wellFormed methods
+ /////////////////////////////////////////////////////////////////////////
+
+ /*! All Nary instruction registers must be of the same family and properly
+ * defined (i.e. not out-of-bound)
+ */
+ static INLINE bool checkRegisterData(RegisterFamily family,
+ const Register &ID,
+ const Function &fn,
+ std::string &whyNot)
+ {
+ if (UNLIKELY(uint16_t(ID) >= fn.regNum())) {
+ whyNot = "Out-of-bound destination register index";
+ return false;
+ }
+ const RegisterData reg = fn.getRegisterData(ID);
+ if (UNLIKELY(reg.family != family)) {
+ whyNot = "Destination family does not match instruction type";
+ return false;
+ }
+ return true;
+ }
+
+ /*! Special registers are *not* writeable */
+ static INLINE bool checkSpecialRegForWrite(const Register ®,
+ const Function &fn,
+ std::string &whyNot)
+ {
+ if (fn.isSpecialReg(reg) == true && reg != ir::ocl::stackptr) {
+ whyNot = "Non stack pointer special registers are not writeable";
+ return false;
+ }
+ return true;
+ }
+
+ /*! We check that the given type belongs to the provided type family */
+ static INLINE bool checkTypeFamily(const Type &type,
+ const Type *family,
+ uint32_t typeNum,
+ std::string &whyNot)
+ {
+ uint32_t typeID = 0;
+ for (; typeID < typeNum; ++typeID)
+ if (family[typeID] == type)
+ break;
+ if (typeID == typeNum) {
+ whyNot = "Type is not supported by the instruction";
+ return false;
+ }
+ return true;
+ }
+
+#define CHECK_TYPE(TYPE, FAMILY) \
+ do { \
+ if (UNLIKELY(checkTypeFamily(TYPE, FAMILY, FAMILY##Num, whyNot)) == false) \
+ return false; \
+ } while (0)
+
+ static const Type madType[] = {TYPE_FLOAT};
+ static const uint32_t madTypeNum = ARRAY_ELEM_NUM(madType);
+
+ // TODO add support for 64 bits values
+ static const Type allButBool[] = {TYPE_S8, TYPE_U8,
+ TYPE_S16, TYPE_U16,
+ TYPE_S32, TYPE_U32,
+ TYPE_FLOAT, TYPE_DOUBLE};
+ static const uint32_t allButBoolNum = ARRAY_ELEM_NUM(allButBool);
+
+ // TODO add support for 64 bits values
+ static const Type logicalType[] = {TYPE_S8, TYPE_U8,
+ TYPE_S16, TYPE_U16,
+ TYPE_S32, TYPE_U32,
+ TYPE_BOOL};
+ static const uint32_t logicalTypeNum = ARRAY_ELEM_NUM(logicalType);
+
+ // Unary and binary instructions share the same rules
+ template <uint32_t srcNum>
+ INLINE bool NaryInstruction<srcNum>::wellFormed(const Function &fn, std::string &whyNot) const
+ {
+ const RegisterFamily family = getFamily(this->type);
+ if (UNLIKELY(checkSpecialRegForWrite(dst[0], fn, whyNot) == false))
+ return false;
+ if (UNLIKELY(checkRegisterData(family, dst[0], fn, whyNot) == false))
+ return false;
+ for (uint32_t srcID = 0; srcID < srcNum; ++srcID)
+ if (UNLIKELY(checkRegisterData(family, src[srcID], fn, whyNot) == false))
+ return false;
+ // We actually support logical operations on boolean values for AND, OR,
+ // and XOR
+ switch (this->opcode) {
+ case OP_OR:
+ case OP_XOR:
+ case OP_AND:
+ CHECK_TYPE(this->type, logicalType);
+ break;
+ default:
+ CHECK_TYPE(this->type, allButBool);
+ break;
+ case OP_POW:
+ case OP_COS:
+ case OP_SIN:
+ case OP_RCP:
+ case OP_ABS:
+ case OP_RSQ:
+ case OP_SQR:
+ case OP_RNDD:
+ case OP_RNDE:
+ case OP_RNDU:
+ case OP_RNDZ:
+ const Type fp = TYPE_FLOAT;
+ if (UNLIKELY(checkTypeFamily(TYPE_FLOAT, &fp, 1, whyNot)) == false)
+ return false;
+ break;
+ }
+ return true;
+ }
+
+ // First source must a boolean. Other must match the destination type
+ INLINE bool SelectInstruction::wellFormed(const Function &fn, std::string &whyNot) const
+ {
+ const RegisterFamily family = getFamily(this->type);
+ if (UNLIKELY(checkSpecialRegForWrite(dst[0], fn, whyNot) == false))
+ return false;
+ if (UNLIKELY(checkRegisterData(family, dst[0], fn, whyNot) == false))
+ return false;
+ if (UNLIKELY(src + 3u > fn.tupleNum())) {
+ whyNot = "Out-of-bound index for ternary instruction";
+ return false;
+ }
+ const Register regID = fn.getRegister(src, 0);
+ if (UNLIKELY(checkRegisterData(FAMILY_BOOL, regID, fn, whyNot) == false))
+ return false;
+ for (uint32_t srcID = 1; srcID < 3; ++srcID) {
+ const Register regID = fn.getRegister(src, srcID);
+ if (UNLIKELY(checkRegisterData(family, regID, fn, whyNot) == false))
+ return false;
+ }
+ CHECK_TYPE(this->type, allButBool);
+ return true;
+ }
+
+ // Pretty similar to binary instruction. Only the destination is of type
+ // boolean
+ INLINE bool CompareInstruction::wellFormed(const Function &fn, std::string &whyNot) const
+ {
+ if (UNLIKELY(checkSpecialRegForWrite(dst[0], fn, whyNot) == false))
+ return false;
+ if (UNLIKELY(checkRegisterData(FAMILY_BOOL, dst[0], fn, whyNot) == false))
+ return false;
+ const RegisterFamily family = getFamily(this->type);
+ for (uint32_t srcID = 0; srcID < 2; ++srcID)
+ if (UNLIKELY(checkRegisterData(family, src[srcID], fn, whyNot) == false))
+ return false;
+ CHECK_TYPE(this->type, allButBool);
+ return true;
+ }
+
+ // We can convert anything to anything, but types and families must match
+ INLINE bool ConvertInstruction::wellFormed(const Function &fn, std::string &whyNot) const
+ {
+ const RegisterFamily dstFamily = getFamily(dstType);
+ const RegisterFamily srcFamily = getFamily(srcType);
+ if (UNLIKELY(checkSpecialRegForWrite(dst[0], fn, whyNot) == false))
+ return false;
+ if (UNLIKELY(checkRegisterData(dstFamily, dst[0], fn, whyNot) == false))
+ return false;
+ if (UNLIKELY(checkRegisterData(srcFamily, src[0], fn, whyNot) == false))
+ return false;
+ CHECK_TYPE(this->dstType, allButBool);
+ CHECK_TYPE(this->srcType, allButBool);
+ return true;
+ }
+
+ /*! Loads and stores follow the same restrictions */
+ template <typename T>
+ INLINE bool wellFormedLoadStore(const T &insn, const Function &fn, std::string &whyNot)
+ {
+ if (UNLIKELY(insn.offset >= fn.regNum())) {
+ whyNot = "Out-of-bound offset register index";
+ return false;
+ }
+ if (UNLIKELY(insn.values + insn.valueNum > fn.tupleNum())) {
+ whyNot = "Out-of-bound tuple index";
+ return false;
+ }
+ // Check all registers
+ const RegisterFamily family = getFamily(insn.type);
+ for (uint32_t valueID = 0; valueID < insn.valueNum; ++valueID) {
+ const Register regID = fn.getRegister(insn.values, valueID);
+ if (UNLIKELY(checkRegisterData(family, regID, fn, whyNot) == false))
+ return false;
+ }
+ CHECK_TYPE(insn.type, allButBool);
+ return true;
+ }
+
+ INLINE bool LoadInstruction::wellFormed(const Function &fn, std::string &whyNot) const
+ {
+ const uint32_t dstNum = this->getDstNum();
+ for (uint32_t dstID = 0; dstID < dstNum; ++dstID) {
+ const Register reg = this->getDst(fn, dstID);
+ const bool isOK = checkSpecialRegForWrite(reg, fn, whyNot);
+ if (UNLIKELY(isOK == false)) return false;
+ }
+ if (UNLIKELY(dstNum > Instruction::MAX_DST_NUM)) {
+ whyNot = "Too many destinations for load instruction";
+ return false;
+ }
+ return wellFormedLoadStore(*this, fn, whyNot);
+ }
+
+ INLINE bool StoreInstruction::wellFormed(const Function &fn, std::string &whyNot) const
+ {
+ const uint32_t srcNum = this->getSrcNum();
+ if (UNLIKELY(srcNum > Instruction::MAX_SRC_NUM)) {
+ whyNot = "Too many source for store instruction";
+ return false;
+ }
+ return wellFormedLoadStore(*this, fn, whyNot);
+ }
+
+ // TODO
+ INLINE bool SampleInstruction::wellFormed(const Function &fn, std::string &why) const
+ { return true; }
+ INLINE bool TypedWriteInstruction::wellFormed(const Function &fn, std::string &why) const
+ { return true; }
+
+ // Ensure that types and register family match
+ INLINE bool LoadImmInstruction::wellFormed(const Function &fn, std::string &whyNot) const
+ {
+ if (UNLIKELY(immediateIndex >= fn.immediateNum())) {
+ whyNot = "Out-of-bound immediate value index";
+ return false;
+ }
+ const ir::Type immType = fn.getImmediate(immediateIndex).type;
+ if (UNLIKELY(type != immType)) {
+ whyNot = "Inconsistant type for the immediate value to load";
+ return false;
+ }
+ const RegisterFamily family = getFamily(type);
+ if (UNLIKELY(checkSpecialRegForWrite(dst[0], fn, whyNot) == false))
+ return false;
+ if (UNLIKELY(checkRegisterData(family, dst[0], fn, whyNot) == false))
+ return false;
+ CHECK_TYPE(this->type, allButBool);
+ return true;
+ }
+
+ INLINE bool SyncInstruction::wellFormed(const Function &fn, std::string &whyNot) const
+ {
+ const uint32_t maxParams = SYNC_WORKGROUP_EXEC |
+ SYNC_LOCAL_READ_FENCE |
+ SYNC_LOCAL_WRITE_FENCE |
+ SYNC_GLOBAL_READ_FENCE |
+ SYNC_GLOBAL_WRITE_FENCE;
+ if (UNLIKELY(this->parameters > maxParams)) {
+ whyNot = "Invalid parameters for sync instruction";
+ return false;
+ } else if (UNLIKELY(this->parameters == 0)) {
+ whyNot = "Missing parameters for sync instruction";
+ return false;
+ }
+ return true;
+ }
+
+ // Only a label index is required
+ INLINE bool LabelInstruction::wellFormed(const Function &fn, std::string &whyNot) const
+ {
+ if (UNLIKELY(labelIndex >= fn.labelNum())) {
+ whyNot = "Out-of-bound label index";
+ return false;
+ }
+ return true;
+ }
+
+ // The label must exist and the register must of boolean family
+ INLINE bool BranchInstruction::wellFormed(const Function &fn, std::string &whyNot) const {
+ if (hasLabel)
+ if (UNLIKELY(labelIndex >= fn.labelNum())) {
+ whyNot = "Out-of-bound label index";
+ return false;
+ }
+ if (hasPredicate)
+ if (UNLIKELY(checkRegisterData(FAMILY_BOOL, predicate, fn, whyNot) == false))
+ return false;
+ return true;
+ }
+
+#undef CHECK_TYPE
+
+ /////////////////////////////////////////////////////////////////////////
+ // Implements all the output stream methods
+ /////////////////////////////////////////////////////////////////////////
+ template <uint32_t srcNum>
+ INLINE void NaryInstruction<srcNum>::out(std::ostream &out, const Function &fn) const {
+ this->outOpcode(out);
+ out << "." << this->getType()
+ << " %" << this->getDst(fn, 0);
+ for (uint32_t i = 0; i < srcNum; ++i)
+ out << " %" << this->getSrc(fn, i);
+ }
+
+ template <typename T>
+ static void ternaryOrSelectOut(const T &insn, std::ostream &out, const Function &fn) {
+ insn.outOpcode(out);
+ out << "." << insn.getType()
+ << " %" << insn.getDst(fn, 0)
+ << " %" << insn.getSrc(fn, 0)
+ << " %" << insn.getSrc(fn, 1)
+ << " %" << insn.getSrc(fn, 2);
+ }
+
+ INLINE void SelectInstruction::out(std::ostream &out, const Function &fn) const {
+ ternaryOrSelectOut(*this, out, fn);
+ }
+
+ INLINE void ConvertInstruction::out(std::ostream &out, const Function &fn) const {
+ this->outOpcode(out);
+ out << "." << this->getDstType()
+ << "." << this->getSrcType()
+ << " %" << this->getDst(fn, 0)
+ << " %" << this->getSrc(fn, 0);
+ }
+
+ INLINE void LoadInstruction::out(std::ostream &out, const Function &fn) const {
+ this->outOpcode(out);
+ out << "." << type << "." << addrSpace << (dwAligned ? "." : ".un") << "aligned";
+ out << " {";
+ for (uint32_t i = 0; i < valueNum; ++i)
+ out << "%" << this->getDst(fn, i) << (i != (valueNum-1u) ? " " : "");
+ out << "}";
+ out << " %" << this->getSrc(fn, 0);
+ }
+
+ INLINE void StoreInstruction::out(std::ostream &out, const Function &fn) const {
+ this->outOpcode(out);
+ out << "." << type << "." << addrSpace << (dwAligned ? "." : ".un") << "aligned";
+ out << " %" << this->getSrc(fn, 0) << " {";
+ for (uint32_t i = 0; i < valueNum; ++i)
+ out << "%" << this->getSrc(fn, i+1) << (i != (valueNum-1u) ? " " : "");
+ out << "}";
+ }
+
+ INLINE void LabelInstruction::out(std::ostream &out, const Function &fn) const {
+ this->outOpcode(out);
+ out << " $" << labelIndex;
+ }
+
+ INLINE void BranchInstruction::out(std::ostream &out, const Function &fn) const {
+ this->outOpcode(out);
+ if (hasPredicate)
+ out << "<%" << this->getSrc(fn, 0) << ">";
+ if (hasLabel) out << " -> label$" << labelIndex;
+ }
+
+ INLINE void LoadImmInstruction::out(std::ostream &out, const Function &fn) const {
+ this->outOpcode(out);
+ out << "." << type;
+ out << " %" << this->getDst(fn,0) << " ";
+ fn.outImmediate(out, immediateIndex);
+ }
+
+ static const char *syncStr[syncFieldNum] = {
+ "workgroup", "local_read", "local_write", "global_read", "global_write"
+ };
+
+ INLINE void SyncInstruction::out(std::ostream &out, const Function &fn) const {
+ this->outOpcode(out);
+ for (uint32_t field = 0; field < syncFieldNum; ++field)
+ if (this->parameters & (1 << field))
+ out << "." << syncStr[field];
+ }
+
+
+ } /* namespace internal */
+
+ std::ostream &operator<< (std::ostream &out, AddressSpace addrSpace) {
+ switch (addrSpace) {
+ case MEM_GLOBAL: return out << "global";
+ case MEM_LOCAL: return out << "local";
+ case MEM_CONSTANT: return out << "constant";
+ case MEM_PRIVATE: return out << "private";
+ case MEM_INVALID: NOT_SUPPORTED; return out;
+ };
+ return out;
+ }
+
+ ///////////////////////////////////////////////////////////////////////////
+ // Implements the various introspection functions
+ ///////////////////////////////////////////////////////////////////////////
+ template <typename T, typename U> struct HelperIntrospection {
+ enum { value = 0 };
+ };
+ template <typename T> struct HelperIntrospection<T,T> {
+ enum { value = 1 };
+ };
+
+ RegisterData Instruction::getDstData(uint32_t ID) const {
+ const Function &fn = this->getFunction();
+ return fn.getRegisterData(this->getDst(ID));
+ }
+ RegisterData Instruction::getSrcData(uint32_t ID) const {
+ const Function &fn = this->getFunction();
+ return fn.getRegisterData(this->getSrc(ID));
+ }
+
+#define DECL_INSN(OPCODE, CLASS) \
+ case OP_##OPCODE: \
+ return HelperIntrospection<CLASS, RefClass>::value == 1;
+
+#define START_INTROSPECTION(CLASS) \
+ static_assert(sizeof(internal::CLASS) == sizeof(uint64_t), \
+ "Bad instruction size"); \
+ static_assert(offsetof(internal::CLASS, opcode) == 0, \
+ "Bad opcode offset"); \
+ bool CLASS::isClassOf(const Instruction &insn) { \
+ const Opcode op = insn.getOpcode(); \
+ typedef CLASS RefClass; \
+ switch (op) {
+
+#define END_INTROSPECTION(CLASS) \
+ default: return false; \
+ }; \
+ }
+
+START_INTROSPECTION(UnaryInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(UnaryInstruction)
+
+START_INTROSPECTION(BinaryInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(BinaryInstruction)
+
+START_INTROSPECTION(CompareInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(CompareInstruction)
+
+START_INTROSPECTION(ConvertInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(ConvertInstruction)
+
+START_INTROSPECTION(SelectInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(SelectInstruction)
+
+START_INTROSPECTION(BranchInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(BranchInstruction)
+
+START_INTROSPECTION(SampleInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(SampleInstruction)
+
+START_INTROSPECTION(TypedWriteInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(TypedWriteInstruction)
+
+START_INTROSPECTION(LoadImmInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(LoadImmInstruction)
+
+START_INTROSPECTION(LoadInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(LoadInstruction)
+
+START_INTROSPECTION(StoreInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(StoreInstruction)
+
+START_INTROSPECTION(SyncInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(SyncInstruction)
+
+START_INTROSPECTION(LabelInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(LabelInstruction)
+
+#undef END_INTROSPECTION
+#undef START_INTROSPECTION
+#undef DECL_INSN
+
+ ///////////////////////////////////////////////////////////////////////////
+ // Implements the function dispatching from public to internal with some
+ // macro horrors
+ ///////////////////////////////////////////////////////////////////////////
+
+#define DECL_INSN(OPCODE, CLASS) \
+ case OP_##OPCODE: return reinterpret_cast<const internal::CLASS*>(this)->CALL;
+
+#define START_FUNCTION(CLASS, RET, PROTOTYPE) \
+ RET CLASS::PROTOTYPE const { \
+ const Opcode op = this->getOpcode(); \
+ switch (op) {
+
+#define END_FUNCTION(CLASS, RET) \
+ case OP_INVALID: return RET(); \
+ }; \
+ return RET(); \
+ }
+
+#define CALL getSrcNum()
+START_FUNCTION(Instruction, uint32_t, getSrcNum(void))
+#include "ir/instruction.hxx"
+END_FUNCTION(Instruction, uint32_t)
+#undef CALL
+
+#define CALL getDstNum()
+START_FUNCTION(Instruction, uint32_t, getDstNum(void))
+#include "ir/instruction.hxx"
+END_FUNCTION(Instruction, uint32_t)
+#undef CALL
+
+#undef DECL_INSN
+
+#define DECL_INSN(OPCODE, CLASS) \
+ case OP_##OPCODE: \
+ { \
+ const Function &fn = this->getFunction(); \
+ return reinterpret_cast<const internal::CLASS*>(this)->CALL; \
+ }
+
+#define CALL wellFormed(fn, whyNot)
+START_FUNCTION(Instruction, bool, wellFormed(std::string &whyNot))
+#include "ir/instruction.hxx"
+END_FUNCTION(Instruction, bool)
+#undef CALL
+
+#define CALL getDst(fn, ID)
+START_FUNCTION(Instruction, Register, getDst(uint32_t ID))
+#include "ir/instruction.hxx"
+END_FUNCTION(Instruction, Register)
+#undef CALL
+
+#define CALL getSrc(fn, ID)
+START_FUNCTION(Instruction, Register, getSrc(uint32_t ID))
+#include "ir/instruction.hxx"
+END_FUNCTION(Instruction, Register)
+#undef CALL
+
+#undef DECL_INSN
+#undef END_FUNCTION
+#undef START_FUNCTION
+
+ void Instruction::setSrc(uint32_t srcID, Register reg) {
+ Function &fn = this->getFunction();
+#if GBE_DEBUG
+ const RegisterData oldData = this->getSrcData(srcID);
+ const RegisterData newData = fn.getRegisterData(reg);
+ GBE_ASSERT(oldData.family == newData.family);
+#endif /* GBE_DEBUG */
+ const Opcode op = this->getOpcode();
+ switch (op) {
+#define DECL_INSN(OP, FAMILY)\
+ case OP_##OP:\
+ reinterpret_cast<internal::FAMILY*>(this)->setSrc(fn, srcID, reg);\
+ break;
+#include "instruction.hxx"
+#undef DECL_INSN
+ case OP_INVALID: NOT_SUPPORTED; break;
+ };
+ }
+
+ void Instruction::setDst(uint32_t dstID, Register reg) {
+ Function &fn = this->getFunction();
+#if GBE_DEBUG
+ const RegisterData oldData = this->getDstData(dstID);
+ const RegisterData newData = fn.getRegisterData(reg);
+ GBE_ASSERT(oldData.family == newData.family);
+#endif /* GBE_DEBUG */
+ const Opcode op = this->getOpcode();
+ switch (op) {
+#define DECL_INSN(OP, FAMILY)\
+ case OP_##OP:\
+ reinterpret_cast<internal::FAMILY*>(this)->setDst(fn, dstID, reg);\
+ break;
+#include "instruction.hxx"
+#undef DECL_INSN
+ case OP_INVALID: NOT_SUPPORTED; break;
+ };
+ }
+
+ const Function &Instruction::getFunction(void) const {
+ const BasicBlock *bb = this->getParent();
+ GBE_ASSERT(bb != NULL);
+ return bb->getParent();
+ }
+ Function &Instruction::getFunction(void) {
+ BasicBlock *bb = this->getParent();
+ GBE_ASSERT(bb != NULL);
+ return bb->getParent();
+ }
+
+ void Instruction::replace(Instruction *other) const {
+ Function &fn = other->getFunction();
+ Instruction *insn = fn.newInstruction(*this);
+ intrusive_list_node *prev = other->prev;
+ insn->parent = other->parent;
+ other->remove();
+ append(insn, prev);
+ }
+
+ void Instruction::remove(void) {
+ Function &fn = this->getFunction();
+ unlink(this);
+ fn.deleteInstruction(this);
+ }
+
+ bool Instruction::hasSideEffect(void) const {
+ return opcode == OP_STORE ||
+ opcode == OP_TYPED_WRITE ||
+ opcode == OP_SYNC;
+ }
+
+#define DECL_MEM_FN(CLASS, RET, PROTOTYPE, CALL) \
+ RET CLASS::PROTOTYPE const { \
+ return reinterpret_cast<const internal::CLASS*>(this)->CALL; \
+ }
+
+DECL_MEM_FN(UnaryInstruction, Type, getType(void), getType())
+DECL_MEM_FN(BinaryInstruction, Type, getType(void), getType())
+DECL_MEM_FN(BinaryInstruction, bool, commutes(void), commutes())
+DECL_MEM_FN(SelectInstruction, Type, getType(void), getType())
+DECL_MEM_FN(CompareInstruction, Type, getType(void), getType())
+DECL_MEM_FN(ConvertInstruction, Type, getSrcType(void), getSrcType())
+DECL_MEM_FN(ConvertInstruction, Type, getDstType(void), getDstType())
+DECL_MEM_FN(StoreInstruction, Type, getValueType(void), getValueType())
+DECL_MEM_FN(StoreInstruction, uint32_t, getValueNum(void), getValueNum())
+DECL_MEM_FN(StoreInstruction, AddressSpace, getAddressSpace(void), getAddressSpace())
+DECL_MEM_FN(StoreInstruction, bool, isAligned(void), isAligned())
+DECL_MEM_FN(LoadInstruction, Type, getValueType(void), getValueType())
+DECL_MEM_FN(LoadInstruction, uint32_t, getValueNum(void), getValueNum())
+DECL_MEM_FN(LoadInstruction, AddressSpace, getAddressSpace(void), getAddressSpace())
+DECL_MEM_FN(LoadInstruction, bool, isAligned(void), isAligned())
+DECL_MEM_FN(LoadImmInstruction, Type, getType(void), getType())
+DECL_MEM_FN(LabelInstruction, LabelIndex, getLabelIndex(void), getLabelIndex())
+DECL_MEM_FN(BranchInstruction, bool, isPredicated(void), isPredicated())
+DECL_MEM_FN(BranchInstruction, LabelIndex, getLabelIndex(void), getLabelIndex())
+DECL_MEM_FN(SyncInstruction, uint32_t, getParameters(void), getParameters())
+
+#undef DECL_MEM_FN
+
+ Immediate LoadImmInstruction::getImmediate(void) const {
+ const Function &fn = this->getFunction();
+ return reinterpret_cast<const internal::LoadImmInstruction*>(this)->getImmediate(fn);
+ }
+
+ ///////////////////////////////////////////////////////////////////////////
+ // Implements the emission functions
+ ///////////////////////////////////////////////////////////////////////////
+
+ // For all unary functions with given opcode
+ Instruction ALU1(Opcode opcode, Type type, Register dst, Register src) {
+ return internal::UnaryInstruction(opcode, type, dst, src).convert();
+ }
+
+ // All unary functions
+#define DECL_EMIT_FUNCTION(NAME) \
+ Instruction NAME(Type type, Register dst, Register src) { \
+ return ALU1(OP_##NAME, type, dst, src);\
+ }
+
+ DECL_EMIT_FUNCTION(MOV)
+ DECL_EMIT_FUNCTION(COS)
+ DECL_EMIT_FUNCTION(SIN)
+ DECL_EMIT_FUNCTION(LOG)
+ DECL_EMIT_FUNCTION(SQR)
+ DECL_EMIT_FUNCTION(RSQ)
+ DECL_EMIT_FUNCTION(RNDD)
+ DECL_EMIT_FUNCTION(RNDE)
+ DECL_EMIT_FUNCTION(RNDU)
+ DECL_EMIT_FUNCTION(RNDZ)
+
+#undef DECL_EMIT_FUNCTION
+
+ // All binary functions
+#define DECL_EMIT_FUNCTION(NAME) \
+ Instruction NAME(Type type, Register dst, Register src0, Register src1) { \
+ return internal::BinaryInstruction(OP_##NAME, type, dst, src0, src1).convert(); \
+ }
+
+ DECL_EMIT_FUNCTION(POW)
+ DECL_EMIT_FUNCTION(MUL)
+ DECL_EMIT_FUNCTION(ADD)
+ DECL_EMIT_FUNCTION(SUB)
+ DECL_EMIT_FUNCTION(DIV)
+ DECL_EMIT_FUNCTION(REM)
+ DECL_EMIT_FUNCTION(SHL)
+ DECL_EMIT_FUNCTION(SHR)
+ DECL_EMIT_FUNCTION(ASR)
+ DECL_EMIT_FUNCTION(BSF)
+ DECL_EMIT_FUNCTION(BSB)
+ DECL_EMIT_FUNCTION(OR)
+ DECL_EMIT_FUNCTION(XOR)
+ DECL_EMIT_FUNCTION(AND)
+
+#undef DECL_EMIT_FUNCTION
+
+ // SEL
+ Instruction SEL(Type type, Register dst, Tuple src) {
+ return internal::SelectInstruction(type, dst, src).convert();
+ }
+
+ // All compare functions
+#define DECL_EMIT_FUNCTION(NAME) \
+ Instruction NAME(Type type, Register dst, Register src0, Register src1) { \
+ const internal::CompareInstruction insn(OP_##NAME, type, dst, src0, src1); \
+ return insn.convert(); \
+ }
+
+ DECL_EMIT_FUNCTION(EQ)
+ DECL_EMIT_FUNCTION(NE)
+ DECL_EMIT_FUNCTION(LE)
+ DECL_EMIT_FUNCTION(LT)
+ DECL_EMIT_FUNCTION(GE)
+ DECL_EMIT_FUNCTION(GT)
+
+#undef DECL_EMIT_FUNCTION
+
+ // CVT
+ Instruction CVT(Type dstType, Type srcType, Register dst, Register src) {
+ return internal::ConvertInstruction(dstType, srcType, dst, src).convert();
+ }
+
+ // BRA
+ Instruction BRA(LabelIndex labelIndex) {
+ return internal::BranchInstruction(OP_BRA, labelIndex).convert();
+ }
+ Instruction BRA(LabelIndex labelIndex, Register pred) {
+ return internal::BranchInstruction(OP_BRA, labelIndex, pred).convert();
+ }
+
+ // RET
+ Instruction RET(void) {
+ return internal::BranchInstruction(OP_RET).convert();
+ }
+
+ // LOADI
+ Instruction LOADI(Type type, Register dst, ImmediateIndex value) {
+ return internal::LoadImmInstruction(type, dst, value).convert();
+ }
+
+ // LOAD and STORE
+#define DECL_EMIT_FUNCTION(NAME, CLASS) \
+ Instruction NAME(Type type, \
+ Tuple tuple, \
+ Register offset, \
+ AddressSpace space, \
+ uint32_t valueNum, \
+ bool dwAligned) \
+ { \
+ return internal::CLASS(type,tuple,offset,space,valueNum,dwAligned).convert(); \
+ }
+
+ DECL_EMIT_FUNCTION(LOAD, LoadInstruction)
+ DECL_EMIT_FUNCTION(STORE, StoreInstruction)
+
+#undef DECL_EMIT_FUNCTION
+
+ // FENCE
+ Instruction SYNC(uint32_t parameters) {
+ return internal::SyncInstruction(parameters).convert();
+ }
+
+ // LABEL
+ Instruction LABEL(LabelIndex labelIndex) {
+ return internal::LabelInstruction(labelIndex).convert();
+ }
+
+ std::ostream &operator<< (std::ostream &out, const Instruction &insn) {
+ const Function &fn = insn.getFunction();
+ switch (insn.getOpcode()) {
+#define DECL_INSN(OPCODE, CLASS) \
+ case OP_##OPCODE: \
+ reinterpret_cast<const internal::CLASS&>(insn).out(out, fn); \
+ break;
+#include "instruction.hxx"
+#undef DECL_INSN
+ case OP_INVALID: NOT_SUPPORTED; break;
+ };
+ return out;
+ }
+
+} /* namespace ir */
+} /* namespace gbe */
+
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
new file mode 100644
index 0000000..e82e0f8
--- /dev/null
+++ b/backend/src/ir/instruction.hpp
@@ -0,0 +1,521 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file instruction.hpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#ifndef __GBE_IR_INSTRUCTION_HPP__
+#define __GBE_IR_INSTRUCTION_HPP__
+
+#include "ir/register.hpp"
+#include "ir/immediate.hpp"
+#include "ir/type.hpp"
+#include "sys/platform.hpp"
+#include "sys/intrusive_list.hpp"
+
+#include <ostream>
+
+namespace gbe {
+namespace ir {
+
+ /*! All opcodes */
+ enum Opcode : uint8_t {
+#define DECL_INSN(INSN, FAMILY) OP_##INSN,
+#include "ir/instruction.hxx"
+#undef DECL_INSN
+ OP_INVALID
+ };
+
+ /*! Different memory spaces */
+ enum AddressSpace : uint8_t {
+ MEM_GLOBAL = 0, //!< Global memory (a la OCL)
+ MEM_LOCAL, //!< Local memory (thread group memory)
+ MEM_CONSTANT, //!< Immutable global memory
+ MEM_PRIVATE, //!< Per thread private memory
+ MEM_INVALID
+ };
+
+ /* Vote function per hardware thread */
+ enum VotePredicate : uint8_t {
+ VOTE_ALL = 0,
+ VOTE_ANY
+ };
+
+ /*! Output the memory space */
+ std::ostream &operator<< (std::ostream &out, AddressSpace addrSpace);
+
+ /*! A label is identified with an unsigned short */
+ TYPE_SAFE(LabelIndex, uint16_t)
+
+ /*! Function class contains the register file and the register tuple. Any
+ * information related to the registers may therefore require a function
+ */
+ class Function;
+
+ /*! Contains the stream of instructions */
+ class BasicBlock;
+
+ ///////////////////////////////////////////////////////////////////////////
+ /// All public instruction classes as manipulated by all public classes
+ ///////////////////////////////////////////////////////////////////////////
+
+ /*! Stores instruction internal data and opcode */
+ class ALIGNED(sizeof(uint64_t)) InstructionBase
+ {
+ public:
+ /*! Initialize the instruction from a 8 bytes stream */
+ INLINE InstructionBase(const char *stream) {
+ opcode = Opcode(stream[0]);
+ for (uint32_t byte = 0; byte < opaqueSize; ++byte)
+ opaque[byte] = stream[byte+1];
+ }
+ /*! Uninitialized instruction */
+ INLINE InstructionBase(void) {}
+ /*! Get the instruction opcode */
+ INLINE Opcode getOpcode(void) const { return opcode; }
+ protected:
+ enum { opaqueSize = sizeof(uint64_t)-sizeof(uint8_t) };
+ Opcode opcode; //!< Idendifies the instruction
+ char opaque[opaqueSize]; //!< Remainder of it
+ GBE_CLASS(InstructionBase); //!< Use internal allocators
+ };
+
+ /*! Store the instruction description in 32 bytes */
+ class Instruction : public InstructionBase, public intrusive_list_node
+ {
+ public:
+ /*! Initialize the instruction from a 8 bytes stream */
+ INLINE Instruction(const char *stream) : InstructionBase(stream) {
+ parent = NULL;
+ }
+ /*! Copy the private fields and give it the same parent */
+ INLINE Instruction(const Instruction &other) :
+ Instruction(reinterpret_cast<const char*>(&other.opcode))
+ {}
+ private:
+ /*! To be consistant with copy constructor */
+ INLINE Instruction &operator= (const Instruction &other) { return *this; }
+ public:
+ /*! Nothing to do here */
+ INLINE ~Instruction(void) {}
+ /*! Uninitialized instruction */
+ INLINE Instruction(void) {}
+ /*! Get the number of sources for this instruction */
+ uint32_t getSrcNum(void) const;
+ /*! Get the number of destination for this instruction */
+ uint32_t getDstNum(void) const;
+ /*! Get the register index of the given source */
+ Register getSrc(uint32_t ID = 0u) const;
+ /*! Get the register index of the given destination */
+ Register getDst(uint32_t ID = 0u) const;
+ /*! Get the register of the given source */
+ RegisterData getDstData(uint32_t ID = 0u) const;
+ /*! Get the register of the given destination */
+ RegisterData getSrcData(uint32_t ID = 0u) const;
+ /*! Set a register in src srcID */
+ void setSrc(uint32_t srcID, Register reg);
+ /*! Set a register in dst dstID */
+ void setDst(uint32_t dstID, Register reg);
+ /*! Is there any side effect in the memory sub-system? */
+ bool hasSideEffect(void) const;
+ /*! Get / set the parent basic block */
+ BasicBlock *getParent(void) { return parent; }
+ const BasicBlock *getParent(void) const { return parent; }
+ void setParent(BasicBlock *block) { this->parent = block; }
+ /*! Get the function from the parent basic block */
+ const Function &getFunction(void) const;
+ Function &getFunction(void);
+ /*! Check that the instruction is well formed (type properly match,
+ * registers not of bound and so on). If not well formed, provide a reason
+ * in string why
+ */
+ bool wellFormed(std::string &why) const;
+ /*! Replace other by this instruction */
+ void replace(Instruction *other) const;
+ /*! Remove the instruction from the instruction stream */
+ void remove(void);
+ /*! Indicates if the instruction belongs to instruction type T. Typically, T
+ * can be BinaryInstruction, UnaryInstruction, LoadInstruction and so on
+ */
+ template <typename T> INLINE bool isMemberOf(void) const {
+ return T::isClassOf(*this);
+ }
+ static const uint32_t MAX_SRC_NUM = 8;
+ static const uint32_t MAX_DST_NUM = 8;
+ protected:
+ BasicBlock *parent; //!< The basic block containing the instruction
+ GBE_CLASS(Instruction); //!< Use internal allocators
+ };
+
+ /*! Output the instruction string in the given stream */
+ std::ostream &operator<< (std::ostream &out, const Instruction &proxy);
+
+ /*! Unary instructions are typed. dst and sources share the same type */
+ class UnaryInstruction : public Instruction {
+ public:
+ /*! Get the type manipulated by the instruction */
+ Type getType(void) const;
+ /*! Return true if the given instruction is an instance of this class */
+ static bool isClassOf(const Instruction &insn);
+ };
+
+ /*! Binary instructions are typed. dst and sources share the same type */
+ class BinaryInstruction : public Instruction {
+ public:
+ /*! Get the type manipulated by the instruction */
+ Type getType(void) const;
+ /*! Commutative instructions can allow better optimizations */
+ bool commutes(void) const;
+ /*! Return true if the given instruction is an instance of this class */
+ static bool isClassOf(const Instruction &insn);
+ };
+
+ /*! Select instructions writes src0 to dst if cond is true. Otherwise, it
+ * writes src1
+ */
+ class SelectInstruction : public Instruction {
+ public:
+ /*! Predicate is in slot 0. So first source to selec is in slot 1 */
+ static const uint32_t src0Index = 1;
+ /*! Second source to select is in slot 2 */
+ static const uint32_t src1Index = 2;
+ /*! Get the predicate of the selection instruction */
+ INLINE Register getPredicate(void) const { return this->getSrc(0); }
+ /*! Get the type of both sources */
+ Type getType(void) const;
+ /*! Return true if the given instruction is an instance of this class */
+ static bool isClassOf(const Instruction &insn);
+ };
+
+ /*! Compare instructions compare anything from the same type and return a
+ * boolean value
+ */
+ class CompareInstruction : public Instruction {
+ public:
+ /*! Get the type of the source registers */
+ Type getType(void) const;
+ /*! Return true if the given instruction is an instance of this class */
+ static bool isClassOf(const Instruction &insn);
+ };
+
+ /*! Conversion instruction converts from one type to another */
+ class ConvertInstruction : public Instruction {
+ public:
+ /*! Get the type of the source */
+ Type getSrcType(void) const;
+ /*! Get the type of the destination */
+ Type getDstType(void) const;
+ /*! Return true if the given instruction is an instance of this class */
+ static bool isClassOf(const Instruction &insn);
+ };
+
+ /*! Store instruction. First source is the address. Next sources are the
+ * values to store contiguously at the given address
+ */
+ class StoreInstruction : public Instruction {
+ public:
+ /*! Where the address register goes */
+ static const uint32_t addressIndex = 0;
+ /*! Return the types of the values to store */
+ Type getValueType(void) const;
+ /*! Give the number of values the instruction is storing (srcNum-1) */
+ uint32_t getValueNum(void) const;
+ /*! Address space that is manipulated here */
+ AddressSpace getAddressSpace(void) const;
+ /*! DWORD aligned means untyped read for Gen. That is what matters */
+ bool isAligned(void) const;
+ /*! Return the register that contains the addresses */
+ INLINE Register getAddress(void) const { return this->getSrc(addressIndex); }
+ /*! Return the register that contain value valueID */
+ INLINE Register getValue(uint32_t valueID) const {
+ GBE_ASSERT(valueID < this->getValueNum());
+ return this->getSrc(valueID + 1u);
+ }
+ /*! Return true if the given instruction is an instance of this class */
+ static bool isClassOf(const Instruction &insn);
+ };
+
+ /*! Load instruction. The source is simply the address where to get the data.
+ * The multiple destinations are the contiguous values loaded at the given
+ * address
+ */
+ class LoadInstruction : public Instruction {
+ public:
+ /*! Type of the loaded values (ie type of all the destinations) */
+ Type getValueType(void) const;
+ /*! Number of values loaded (ie number of destinations) */
+ uint32_t getValueNum(void) const;
+ /*! Address space that is manipulated here */
+ AddressSpace getAddressSpace(void) const;
+ /*! DWORD aligned means untyped read for Gen. That is what matters */
+ bool isAligned(void) const;
+ /*! Return the register that contains the addresses */
+ INLINE Register getAddress(void) const { return this->getSrc(0u); }
+ /*! Return the register that contain value valueID */
+ INLINE Register getValue(uint32_t valueID) const {
+ return this->getDst(valueID);
+ }
+ /*! Return true if the given instruction is an instance of this class */
+ static bool isClassOf(const Instruction &insn);
+ };
+
+ /*! Load immediate instruction loads an typed immediate value into the given
+ * register. Since double and uint64_t values will not fit into an
+ * instruction, the immediate themselves are stored in the function core.
+ * Contrary to regular load instructions, there is only one destination
+ * possible
+ */
+ class LoadImmInstruction : public Instruction {
+ public:
+ /*! Return the value stored in the instruction */
+ Immediate getImmediate(void) const;
+ /*! Return the type of the stored value */
+ Type getType(void) const;
+ /*! Return true if the given instruction is an instance of this class */
+ static bool isClassOf(const Instruction &insn);
+ };
+
+ /*! Store data in an texture */
+ class TypedWriteInstruction : public Instruction {
+ public:
+ /*! Return true if the given instruction is an instance of this class */
+ static bool isClassOf(const Instruction &insn);
+ };
+
+ /*! Load texels from a texture */
+ class SampleInstruction : public Instruction {
+ public:
+ /*! Return true if the given instruction is an instance of this class */
+ static bool isClassOf(const Instruction &insn);
+ };
+
+ /*! Branch instruction is the unified way to branch (with or without
+ * predicate)
+ */
+ class BranchInstruction : public Instruction {
+ public:
+ /*! Indicate if the branch is predicated */
+ bool isPredicated(void) const;
+ /*! Return the predicate register (if predicated) */
+ RegisterData getPredicate(void) const {
+ GBE_ASSERTM(this->isPredicated() == true, "Branch is not predicated");
+ return this->getSrcData(0);
+ }
+ /*! Return the predicate register index (if predicated) */
+ Register getPredicateIndex(void) const {
+ GBE_ASSERTM(this->isPredicated() == true, "Branch is not predicated");
+ return this->getSrc(0);
+ }
+ /*! Return the label index pointed by the branch */
+ LabelIndex getLabelIndex(void) const;
+ /*! Return true if the given instruction is an instance of this class */
+ static bool isClassOf(const Instruction &insn);
+ };
+
+ /*! Label instruction are actual no-op but are referenced by branches as their
+ * targets
+ */
+ class LabelInstruction : public Instruction {
+ public:
+ /*! Return the label index of the instruction */
+ LabelIndex getLabelIndex(void) const;
+ /*! Return true if the given instruction is an instance of this class */
+ static bool isClassOf(const Instruction &insn);
+ };
+
+ /*! Texture instruction are used for any texture mapping requests */
+ class TextureInstruction : public Instruction {
+ public:
+ /*! Return true if the given instruction is an instance of this class */
+ static bool isClassOf(const Instruction &insn);
+ };
+
+ /*! Mapped to OpenCL (mem_fence, read_mem_fence, write_mem_fence, barrier) */
+ enum {
+ SYNC_WORKGROUP_EXEC = 1<<0,
+ SYNC_LOCAL_READ_FENCE = 1<<1,
+ SYNC_LOCAL_WRITE_FENCE = 1<<2,
+ SYNC_GLOBAL_READ_FENCE = 1<<3,
+ SYNC_GLOBAL_WRITE_FENCE = 1<<4,
+ SYNC_INVALID = 1<<5
+ };
+
+ /*! 5 bits to encode all possible synchronization capablities */
+ static const uint32_t syncFieldNum = 5u;
+
+ /*! When barrier(CLK_LOCAL_MEM_FENCE) is issued */
+ static const uint32_t syncLocalBarrier = SYNC_WORKGROUP_EXEC |SYNC_LOCAL_WRITE_FENCE | SYNC_LOCAL_READ_FENCE;
+
+ /*! When barrier(CLK_GLOBAL_MEM_FENCE) is issued */
+ static const uint32_t syncGlobalBarrier = SYNC_WORKGROUP_EXEC | SYNC_GLOBAL_WRITE_FENCE | SYNC_GLOBAL_READ_FENCE;
+
+ /*! Sync instructions are used to order loads and stores for a given memory
+ * space and/or to serialize threads at a given point in the program
+ */
+ class SyncInstruction : public Instruction {
+ public:
+ /*! Get the parameters (bitfields) of the sync instructions (see above) */
+ uint32_t getParameters(void) const;
+ /*! Return true if the given instruction is an instance of this class */
+ static bool isClassOf(const Instruction &insn);
+ };
+
+ /*! Specialize the instruction. Also performs typechecking first based on the
+ * opcode. Crashes if it fails
+ */
+ template <typename T>
+ INLINE T *cast(Instruction *insn) {
+ if(insn->isMemberOf<T>())
+ return reinterpret_cast<T*>(insn);
+ else
+ return NULL;
+ }
+ template <typename T>
+ INLINE const T *cast(const Instruction *insn) {
+ if(insn->isMemberOf<T>())
+ return reinterpret_cast<const T*>(insn);
+ else
+ return NULL;
+ }
+ template <typename T>
+ INLINE T &cast(Instruction &insn) {
+ GBE_ASSERTM(insn.isMemberOf<T>() == true, "Invalid instruction type");
+ return reinterpret_cast<T&>(insn);
+ }
+ template <typename T>
+ INLINE const T &cast(const Instruction &insn) {
+ GBE_ASSERTM(insn.isMemberOf<T>() == true, "Invalid instruction type");
+ return reinterpret_cast<const T&>(insn);
+ }
+
+ /*! Indicates if the given opcode belongs the given instruction family */
+ template <typename T, typename U> struct EqualType {enum {value = false};};
+ template <typename T> struct EqualType<T,T> { enum {value = true};};
+ template <typename T>
+ INLINE bool isOpcodeFrom(Opcode op) {
+ switch (op) {
+#define DECL_INSN(OPCODE, FAMILY) \
+ case OP_##OPCODE: return EqualType<T, FAMILY>::value;
+#include "instruction.hxx"
+#undef DECL_INSN
+ default: NOT_SUPPORTED; return false;
+ }
+ }
+
+ ///////////////////////////////////////////////////////////////////////////
+ /// All emission functions
+ ///////////////////////////////////////////////////////////////////////////
+
+ /*! alu1.type dst src */
+ Instruction ALU1(Opcode opcode, Type type, Register dst, Register src);
+ /*! mov.type dst src */
+ Instruction MOV(Type type, Register dst, Register src);
+ /*! cos.type dst src */
+ Instruction COS(Type type, Register dst, Register src);
+ /*! sin.type dst src */
+ Instruction SIN(Type type, Register dst, Register src);
+ /*! tan.type dst src */
+ Instruction RCP(Type type, Register dst, Register src);
+ /*! abs.type dst src */
+ Instruction ABS(Type type, Register dst, Register src);
+ /*! log.type dst src */
+ Instruction LOG(Type type, Register dst, Register src);
+ /*! sqr.type dst src */
+ Instruction SQR(Type type, Register dst, Register src);
+ /*! rsq.type dst src */
+ Instruction RSQ(Type type, Register dst, Register src);
+ /*! rndd.type dst src */
+ Instruction RNDD(Type type, Register dst, Register src);
+ /*! rnde.type dst src */
+ Instruction RNDE(Type type, Register dst, Register src);
+ /*! rndu.type dst src */
+ Instruction RNDU(Type type, Register dst, Register src);
+ /*! rndz.type dst src */
+ Instruction RNDZ(Type type, Register dst, Register src);
+ /*! pow.type dst src0 src1 */
+ Instruction POW(Type type, Register dst, Register src0, Register src1);
+ /*! mul.type dst src0 src1 */
+ Instruction MUL(Type type, Register dst, Register src0, Register src1);
+ /*! add.type dst src0 src1 */
+ Instruction ADD(Type type, Register dst, Register src0, Register src1);
+ /*! sub.type dst src0 src1 */
+ Instruction SUB(Type type, Register dst, Register src0, Register src1);
+ /*! div.type dst src0 src1 */
+ Instruction DIV(Type type, Register dst, Register src0, Register src1);
+ /*! rem.type dst src0 src1 */
+ Instruction REM(Type type, Register dst, Register src0, Register src1);
+ /*! shl.type dst src0 src1 */
+ Instruction SHL(Type type, Register dst, Register src0, Register src1);
+ /*! shr.type dst src0 src1 */
+ Instruction SHR(Type type, Register dst, Register src0, Register src1);
+ /*! asr.type dst src0 src1 */
+ Instruction ASR(Type type, Register dst, Register src0, Register src1);
+ /*! bsf.type dst src0 src1 */
+ Instruction BSF(Type type, Register dst, Register src0, Register src1);
+ /*! bsb.type dst src0 src1 */
+ Instruction BSB(Type type, Register dst, Register src0, Register src1);
+ /*! or.type dst src0 src1 */
+ Instruction OR(Type type, Register dst, Register src0, Register src1);
+ /*! xor.type dst src0 src1 */
+ Instruction XOR(Type type, Register dst, Register src0, Register src1);
+ /*! and.type dst src0 src1 */
+ Instruction AND(Type type, Register dst, Register src0, Register src1);
+ /*! sel.type dst {cond, src0, src1} (== src) */
+ Instruction SEL(Type type, Register dst, Tuple src);
+ /*! eq.type dst src0 src1 */
+ Instruction EQ(Type type, Register dst, Register src0, Register src1);
+ /*! ne.type dst src0 src1 */
+ Instruction NE(Type type, Register dst, Register src0, Register src1);
+ /*! lt.type dst src0 src1 */
+ Instruction LE(Type type, Register dst, Register src0, Register src1);
+ /*! le.type dst src0 src1 */
+ Instruction LT(Type type, Register dst, Register src0, Register src1);
+ /*! gt.type dst src0 src1 */
+ Instruction GE(Type type, Register dst, Register src0, Register src1);
+ /*! ge.type dst src0 src1 */
+ Instruction GT(Type type, Register dst, Register src0, Register src1);
+ /*! cvt.{dstType <- srcType} dst src */
+ Instruction CVT(Type dstType, Type srcType, Register dst, Register src);
+ /*! bra labelIndex */
+ Instruction BRA(LabelIndex labelIndex);
+ /*! (pred) bra labelIndex */
+ Instruction BRA(LabelIndex labelIndex, Register pred);
+ /*! ret */
+ Instruction RET(void);
+ /*! load.type.space {dst1,...,dst_valueNum} offset value */
+ Instruction LOAD(Type type, Tuple dst, Register offset, AddressSpace space, uint32_t valueNum, bool dwAligned);
+ /*! store.type.space offset {src1,...,src_valueNum} value */
+ Instruction STORE(Type type, Tuple src, Register offset, AddressSpace space, uint32_t valueNum, bool dwAligned);
+ /*! loadi.type dst value */
+ Instruction LOADI(Type type, Register dst, ImmediateIndex value);
+ /*! sync.params... (see Sync instruction) */
+ Instruction SYNC(uint32_t parameters);
+ /*! typed write TODO */
+ Instruction TYPED_WRITE(void);
+ /*! sample TODO */
+ Instruction SAMPLE(void);
+ /*! label labelIndex */
+ Instruction LABEL(LabelIndex labelIndex);
+
+} /* namespace ir */
+} /* namespace gbe */
+
+#endif /* __GBE_IR_INSTRUCTION_HPP__ */
+
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
new file mode 100644
index 0000000..6aedc1f
--- /dev/null
+++ b/backend/src/ir/instruction.hxx
@@ -0,0 +1,71 @@
+/*
+ * Copyright 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file instruction.hxx
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+DECL_INSN(MOV, UnaryInstruction)
+DECL_INSN(COS, UnaryInstruction)
+DECL_INSN(SIN, UnaryInstruction)
+DECL_INSN(LOG, UnaryInstruction)
+DECL_INSN(SQR, UnaryInstruction)
+DECL_INSN(RSQ, UnaryInstruction)
+DECL_INSN(RCP, UnaryInstruction)
+DECL_INSN(ABS, UnaryInstruction)
+DECL_INSN(RNDD, UnaryInstruction)
+DECL_INSN(RNDE, UnaryInstruction)
+DECL_INSN(RNDU, UnaryInstruction)
+DECL_INSN(RNDZ, UnaryInstruction)
+DECL_INSN(POW, BinaryInstruction)
+DECL_INSN(MUL, BinaryInstruction)
+DECL_INSN(ADD, BinaryInstruction)
+DECL_INSN(SUB, BinaryInstruction)
+DECL_INSN(DIV, BinaryInstruction)
+DECL_INSN(REM, BinaryInstruction)
+DECL_INSN(SHL, BinaryInstruction)
+DECL_INSN(SHR, BinaryInstruction)
+DECL_INSN(ASR, BinaryInstruction)
+DECL_INSN(BSF, BinaryInstruction)
+DECL_INSN(BSB, BinaryInstruction)
+DECL_INSN(OR, BinaryInstruction)
+DECL_INSN(XOR, BinaryInstruction)
+DECL_INSN(AND, BinaryInstruction)
+DECL_INSN(SEL, SelectInstruction)
+DECL_INSN(EQ, CompareInstruction)
+DECL_INSN(NE, CompareInstruction)
+DECL_INSN(LE, CompareInstruction)
+DECL_INSN(LT, CompareInstruction)
+DECL_INSN(GE, CompareInstruction)
+DECL_INSN(GT, CompareInstruction)
+DECL_INSN(CVT, ConvertInstruction)
+DECL_INSN(BRA, BranchInstruction)
+DECL_INSN(RET, BranchInstruction)
+DECL_INSN(LOADI, LoadImmInstruction)
+DECL_INSN(LOAD, LoadInstruction)
+DECL_INSN(STORE, StoreInstruction)
+DECL_INSN(TYPED_WRITE, TypedWriteInstruction)
+DECL_INSN(SAMPLE, SampleInstruction)
+DECL_INSN(SYNC, SyncInstruction)
+DECL_INSN(LABEL, LabelInstruction)
+
diff --git a/backend/src/ir/liveness.cpp b/backend/src/ir/liveness.cpp
new file mode 100644
index 0000000..b0a4314
--- /dev/null
+++ b/backend/src/ir/liveness.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file liveness.cpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#include "ir/liveness.hpp"
+#include <sstream>
+
+namespace gbe {
+namespace ir {
+
+ Liveness::Liveness(Function &fn) : fn(fn) {
+ // Initialize UEVar and VarKill for each block
+ fn.foreachBlock([this](const BasicBlock &bb) { this->initBlock(bb); });
+ // Now with iterative analysis, we compute liveout sets
+ this->computeLiveOut();
+ }
+
+ Liveness::~Liveness(void) {
+ for (auto &pair : liveness) GBE_SAFE_DELETE(pair.second);
+ }
+
+ void Liveness::initBlock(const BasicBlock &bb) {
+ GBE_ASSERT(liveness.contains(&bb) == false);
+ BlockInfo *info = GBE_NEW(BlockInfo, bb);
+ // Traverse all instructions to handle UEVar and VarKill
+ const_cast<BasicBlock&>(bb).foreach([this, info](const Instruction &insn) {
+ this->initInstruction(*info, insn);
+ });
+ liveness[&bb] = info;
+ }
+
+ void Liveness::initInstruction(BlockInfo &info, const Instruction &insn) {
+ const uint32_t srcNum = insn.getSrcNum();
+ const uint32_t dstNum = insn.getDstNum();
+ // First look for used before killed
+ for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
+ const Register reg = insn.getSrc(srcID);
+ // Not killed -> it is really an upward use
+ if (info.varKill.contains(reg) == false)
+ info.upwardUsed.insert(reg);
+ }
+ // A destination is a killed value
+ for (uint32_t dstID = 0; dstID < dstNum; ++dstID) {
+ const Register reg = insn.getDst(dstID);
+ info.varKill.insert(reg);
+ }
+ }
+
+ void Liveness::computeLiveOut(void) {
+ // First insert the UEVar from the successors
+ foreach<DF_SUCC>([](BlockInfo &info, const BlockInfo &succ) {
+ const UEVar &ueVarSet = succ.upwardUsed;
+ // Iterate over all the registers in the UEVar of our successor
+ for (auto ueVar : ueVarSet) info.liveOut.insert(ueVar);
+ });
+ // Now iterate on liveOut
+ bool changed = true;
+ while (changed) {
+ changed = false;
+ foreach<DF_SUCC>([&changed](BlockInfo &info, const BlockInfo &succ) {
+ const UEVar &killSet = succ.varKill;
+ const LiveOut &liveOut = succ.liveOut;
+ // Iterate over all the registers in the UEVar of our successor
+ for (auto living : liveOut) {
+ if (killSet.contains(living)) continue;
+ if (info.liveOut.contains(living)) continue;
+ info.liveOut.insert(living);
+ changed = true;
+ }
+ });
+ }
+ }
+
+ /*! To pretty print the livfeness info */
+ static const uint32_t prettyInsnStrSize = 48;
+ static const uint32_t prettyRegStrSize = 5;
+
+ /*! Describe how the register is used */
+ static const uint32_t USE_NONE = 0;
+ static const uint32_t USE_READ = 1 << 0;
+ static const uint32_t USE_WRITTEN = 1 << 1;
+
+ enum UsePosition {
+ POS_BEFORE = 0,
+ POS_HERE = 1,
+ POS_AFTER = 2
+ };
+} /* namespace ir */
+} /* namespace gbe */
+
diff --git a/backend/src/ir/liveness.hpp b/backend/src/ir/liveness.hpp
new file mode 100644
index 0000000..ea5a157
--- /dev/null
+++ b/backend/src/ir/liveness.hpp
@@ -0,0 +1,134 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file liveness.hpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#ifndef __GBE_IR_LIVENESS_HPP__
+#define __GBE_IR_LIVENESS_HPP__
+
+#include "sys/map.hpp"
+#include "sys/set.hpp"
+#include "ir/register.hpp"
+#include "ir/function.hpp"
+
+namespace gbe {
+namespace ir {
+
+ // Liveness is computed per function
+ class Function;
+
+ /*! To choose the iteration direction, we either look at predecessors or
+ * successors
+ */
+ enum DataFlowDirection {
+ DF_PRED = 0,
+ DF_SUCC = 1
+ };
+
+ /*! Compute liveness of each register */
+ class Liveness : public NonCopyable
+ {
+ public:
+ Liveness(Function &fn);
+ ~Liveness(void);
+ /*! Set of variables used upwards in the block (before a definition) */
+ typedef set<Register> UEVar;
+ /*! Set of variables alive at the exit of the block */
+ typedef set<Register> LiveOut;
+ /*! Set of variables actually killed in each block */
+ typedef set<Register> VarKill;
+ /*! Per-block info */
+ struct BlockInfo : public NonCopyable {
+ BlockInfo(const BasicBlock &bb) : bb(bb) {}
+ const BasicBlock &bb;
+ INLINE bool inUpwardUsed(Register reg) const {
+ return upwardUsed.contains(reg);
+ }
+ INLINE bool inLiveOut(Register reg) const {
+ return liveOut.contains(reg);
+ }
+ INLINE bool inVarKill(Register reg) const {
+ return varKill.contains(reg);
+ }
+ UEVar upwardUsed;
+ LiveOut liveOut;
+ VarKill varKill;
+ };
+ /*! Gives for each block the variables alive at entry / exit */
+ typedef map<const BasicBlock*, BlockInfo*> Info;
+ /*! Return the complete liveness info */
+ INLINE const Info &getLivenessInfo(void) const { return liveness; }
+ /*! Return the complete block info */
+ INLINE const BlockInfo &getBlockInfo(const BasicBlock *bb) const {
+ auto it = liveness.find(bb);
+ GBE_ASSERT(it != liveness.end() && it->second != NULL);
+ return *it->second;
+ }
+ /*! Get the set of registers alive at the end of the block */
+ const LiveOut &getLiveOut(const BasicBlock *bb) const {
+ const BlockInfo &info = this->getBlockInfo(bb);
+ return info.liveOut;
+ }
+ /*! Return the function the liveness was computed on */
+ INLINE const Function &getFunction(void) const { return fn; }
+ /*! Actually do something for each successor / predecessor of *all* blocks */
+ template <DataFlowDirection dir, typename T>
+ void foreach(const T &functor) {
+ // Iterate on all blocks
+ for (const auto &pair : liveness) {
+ BlockInfo &info = *pair.second;
+ const BasicBlock &bb = info.bb;
+ const BlockSet *set = NULL;
+ if (dir == DF_SUCC)
+ set = &bb.getSuccessorSet();
+ else
+ set = &bb.getPredecessorSet();
+ // Iterate over all successors
+ for (auto other : *set) {
+ auto otherInfo = liveness.find(other);
+ GBE_ASSERT(otherInfo != liveness.end() && otherInfo->second != NULL);
+ functor(info, *otherInfo->second);
+ }
+ }
+ }
+ private:
+ /*! Store the liveness of all blocks */
+ Info liveness;
+ /*! Compute the liveness for this function */
+ Function &fn;
+ /*! Initialize UEVar and VarKill per block */
+ void initBlock(const BasicBlock &bb);
+ /*! Initialize UEVar and VarKill per instruction */
+ void initInstruction(BlockInfo &info, const Instruction &insn);
+ /*! Now really compute LiveOut based on UEVar and VarKill */
+ void computeLiveOut(void);
+ /*! Use custom allocators */
+ GBE_CLASS(Liveness);
+ };
+
+ /*! Output a nice ASCII reprensation of the liveness */
+ std::ostream &operator<< (std::ostream &out, const Liveness &liveness);
+
+} /* namespace ir */
+} /* namespace gbe */
+
+#endif /* __GBE_IR_LIVENESS_HPP__ */
+
diff --git a/backend/src/ir/lowering.cpp b/backend/src/ir/lowering.cpp
new file mode 100644
index 0000000..6cccaf5
--- /dev/null
+++ b/backend/src/ir/lowering.cpp
@@ -0,0 +1,380 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file lowering.cpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "ir/context.hpp"
+#include "ir/value.hpp"
+#include "ir/liveness.hpp"
+#include "sys/set.hpp"
+
+namespace gbe {
+namespace ir {
+
+ /*! Small helper class to lower return instructions */
+ class ContextReturn : public Context
+ {
+ public:
+ /*! Initialize a context dedicated to return instruction lowering */
+ ContextReturn(Unit &unit) : Context(unit) {
+ this->usedLabels = GBE_NEW_NO_ARG(vector<uint8_t>);
+ }
+ /*! Lower the return instruction to gotos for the given function */
+ void lower(const std::string &functionName);
+ };
+
+ void ContextReturn::lower(const std::string &functionName) {
+ if ((this->fn = unit.getFunction(functionName)) == NULL)
+ return;
+
+ // Append a new block at the end of the function with a return instruction:
+ // the only one we are going to have
+ this->bb = &this->fn->getBottomBlock();
+ const LabelIndex index = this->label();
+ this->LABEL(index);
+ const BasicBlock *lastBlock = this->bb;
+ this->RET();
+
+ // Now traverse all instructions and replace all returns by GOTO index
+ fn->foreachInstruction([&](Instruction &insn) {
+ if (insn.getParent() == lastBlock) return; // This is the last block
+ if (insn.getOpcode() != OP_RET) return;
+ const Instruction bra = ir::BRA(index);
+ bra.replace(&insn);
+ });
+ }
+
+ void lowerReturn(Unit &unit, const std::string &functionName) {
+ ContextReturn ctx(unit);
+ ctx.lower(functionName);
+ }
+
+ /*! Characterizes how the argument is used (directly read, indirectly read,
+ * written)
+ */
+ enum ArgUse {
+ ARG_DIRECT_READ = 0,
+ ARG_INDIRECT_READ = 1,
+ ARG_WRITTEN = 2
+ };
+
+ /*! Just to book keep the sequence of instructions that directly load an input
+ * argument
+ */
+ struct LoadAddImm {
+ Instruction *load; //!< Load from the argument
+ Instruction *add; //!< Can be NULL if we only have load(arg)
+ Instruction *loadImm; //!< Can also be NULL
+ uint64_t offset; //!< Offset where to load in the structure
+ uint32_t argID; //!< Associated function argument
+ };
+
+ /*! List of direct loads */
+ typedef vector<LoadAddImm> LoadAddImmSeq;
+
+ /*! Helper class to lower function arguments if required */
+ class FunctionArgumentLowerer : public Context
+ {
+ public:
+ /*! Build the helper structure */
+ FunctionArgumentLowerer(Unit &unit);
+ /*! Free everything we needed */
+ virtual ~FunctionArgumentLowerer(void);
+ /*! Perform all function arguments substitution if needed */
+ void lower(const std::string &name);
+ /*! Lower the given function argument accesses */
+ void lower(uint32_t argID);
+ /*! Build the constant push for the function */
+ void buildConstantPush(void);
+ /*! Inspect the given function argument to see how it is used. If this is
+ * direct loads only, we also output the list of instructions used for each
+ * load
+ */
+ ArgUse getArgUse(uint32_t argID);
+ /*! Recursively look if there is a store in the given use */
+ bool useStore(const ValueDef &def, set<const Instruction*> &visited);
+ /*! Look if the pointer use only load with immediate offsets */
+ bool matchLoadAddImm(uint32_t argID);
+ Liveness *liveness; //!< To compute the function graph
+ FunctionDAG *dag; //!< Contains complete dependency information
+ LoadAddImmSeq seq; //!< All the direct loads
+ };
+
+ INLINE uint64_t getOffsetFromImm(const Immediate &imm) {
+ switch (imm.type) {
+ // bit-cast these ones
+ case TYPE_DOUBLE:
+ case TYPE_FLOAT:
+ case TYPE_S64:
+ case TYPE_U64:
+ case TYPE_U32:
+ case TYPE_U16:
+ case TYPE_U8: return imm.data.u64;
+ // sign extend these ones
+ case TYPE_S32: return int64_t(imm.data.s32);
+ case TYPE_S16: return int64_t(imm.data.s16);
+ case TYPE_S8: return int64_t(imm.data.s8);
+ case TYPE_BOOL:
+ case TYPE_HALF: NOT_SUPPORTED; return 0;
+ }
+ return 0;
+ }
+
+ bool matchLoad(Instruction *insn,
+ Instruction *add,
+ Instruction *loadImm,
+ uint64_t offset,
+ uint32_t argID,
+ LoadAddImm &loadAddImm)
+ {
+ const Opcode opcode = insn->getOpcode();
+
+ if (opcode == OP_LOAD) {
+ LoadInstruction *load = cast<LoadInstruction>(insn);
+ if (load->getAddressSpace() != MEM_PRIVATE)
+ return false;
+ loadAddImm.load = insn;
+ loadAddImm.add = add;
+ loadAddImm.loadImm = loadImm;
+ loadAddImm.offset = offset;
+ loadAddImm.argID = argID;
+ return true;
+ } else
+ return false;
+ }
+
+
+ FunctionArgumentLowerer::FunctionArgumentLowerer(Unit &unit) :
+ Context(unit), liveness(NULL), dag(NULL) {}
+ FunctionArgumentLowerer::~FunctionArgumentLowerer(void) {
+ GBE_SAFE_DELETE(dag);
+ GBE_SAFE_DELETE(liveness);
+ }
+
+ void FunctionArgumentLowerer::lower(const std::string &functionName) {
+ if ((this->fn = unit.getFunction(functionName)) == NULL)
+ return;
+ GBE_SAFE_DELETE(dag);
+ GBE_SAFE_DELETE(liveness);
+ this->liveness = GBE_NEW(ir::Liveness, *fn);
+ this->dag = GBE_NEW(ir::FunctionDAG, *this->liveness);
+
+ // Process all structure arguments and find all the direct loads we can
+ // replace
+ const uint32_t argNum = fn->argNum();
+ for (uint32_t argID = 0; argID < argNum; ++argID) {
+ FunctionArgument &arg = fn->getArg(argID);
+ if (arg.type != FunctionArgument::STRUCTURE) continue;
+ this->lower(argID);
+ }
+
+ // Build the constant push description and remove the instruction that
+ // therefore become useless
+ this->buildConstantPush();
+ }
+
+// Remove all the given instructions from the stream (if dead)
+#define REMOVE_INSN(WHICH) \
+ for (const auto &loadAddImm : seq) { \
+ Instruction *WHICH = loadAddImm.WHICH; \
+ if (WHICH == NULL) continue; \
+ const UseSet &useSet = dag->getUse(WHICH, 0); \
+ bool isDead = true; \
+ for (auto use : useSet) { \
+ if (dead.contains(use->getInstruction()) == false) { \
+ isDead = false; \
+ break; \
+ } \
+ } \
+ if (isDead) { \
+ dead.insert(WHICH); \
+ WHICH->remove(); \
+ } \
+ }
+
+ void FunctionArgumentLowerer::buildConstantPush(void)
+ {
+ if (seq.size() == 0)
+ return;
+
+ // Track instructions we remove to recursively kill them properly
+ set<const Instruction*> dead;
+
+ // The argument location we already pushed (since the same argument location
+ // can be used several times)
+ set<PushLocation> inserted;
+ for (const auto &loadAddImm : seq) {
+ LoadInstruction *load = cast<LoadInstruction>(loadAddImm.load);
+ const uint32_t valueNum = load->getValueNum();
+ for (uint32_t valueID = 0; valueID < valueNum; ++valueID) {
+ const Type type = load->getValueType();
+ const RegisterFamily family = getFamily(type);
+ const uint32_t size = getFamilySize(family);
+ const uint32_t offset = loadAddImm.offset + valueID * size;
+ const PushLocation argLocation(*fn, loadAddImm.argID, offset);
+ if (inserted.contains(argLocation))
+ continue;
+ const Register reg = load->getValue(valueID);
+ const Register pushed = fn->newRegister(family);
+
+ // TODO the MOV instruction can be most of the time avoided if the
+ // register is never written. We must however support the register
+ // replacement in the instruction interface to be able to patch all the
+ // instruction that uses "reg"
+ const Instruction mov = ir::MOV(type, reg, pushed);
+ mov.replace(load);
+ dead.insert(load);
+ this->appendPushedConstant(pushed, argLocation);
+ }
+ }
+
+ // Remove all unused adds and load immediates
+ REMOVE_INSN(add)
+ REMOVE_INSN(loadImm)
+ }
+
+#undef REMOVE_INSN
+
+ bool FunctionArgumentLowerer::useStore(const ValueDef &def, set<const Instruction*> &visited)
+ {
+ const UseSet &useSet = dag->getUse(def);
+ for (const auto &use : useSet) {
+ const Instruction *insn = use->getInstruction();
+ const uint32_t srcID = use->getSrcID();
+ const Opcode opcode = insn->getOpcode();
+ if (visited.contains(insn)) continue;
+ visited.insert(insn);
+ if (opcode == OP_STORE && srcID == StoreInstruction::addressIndex)
+ return true;
+ if (insn->isMemberOf<UnaryInstruction>() == false &&
+ insn->isMemberOf<BinaryInstruction>() == false)
+ continue;
+ else {
+ const uint32_t dstNum = insn->getDstNum();
+ for (uint32_t dstID = 0; dstID < dstNum; ++dstID)
+ if (this->useStore(ValueDef(insn, dstID), visited) == true)
+ return true;
+ }
+ }
+ return false;
+ }
+
+ bool FunctionArgumentLowerer::matchLoadAddImm(uint32_t argID)
+ {
+ const FunctionArgument &arg = fn->getArg(argID);
+ LoadAddImmSeq tmpSeq;
+
+ // Inspect all uses of the function argument pointer
+ const UseSet &useSet = dag->getUse(&arg);
+ for (auto use : useSet) {
+ Instruction *insn = const_cast<Instruction*>(use->getInstruction());
+ const Opcode opcode = insn->getOpcode();
+
+ // load dst arg
+ LoadAddImm loadAddImm;
+ if (matchLoad(insn, NULL, NULL, 0, argID, loadAddImm)) {
+ tmpSeq.push_back(loadAddImm);
+ continue;
+ }
+
+ // add.ptr_type dst ptr other
+ if (opcode != OP_ADD) return false;
+ BinaryInstruction *add = cast<BinaryInstruction>(insn);
+ const Type addType = add->getType();
+ const RegisterFamily family = getFamily(addType);
+ if (family != unit.getPointerFamily()) return false;
+ if (addType == TYPE_FLOAT) return false;
+
+ // step 1 -> check that the other source comes from a load immediate
+ const uint32_t srcID = use->getSrcID();
+ const uint32_t otherID = srcID ^ 1;
+ const DefSet &defSet = dag->getDef(insn, otherID);
+ const uint32_t defNum = defSet.size();
+ if (defNum == 0 || defNum > 1) continue; // undefined or more than one def
+ const ValueDef *otherDef = *defSet.begin();
+ if (otherDef->getType() != ValueDef::DEF_INSN_DST) return false;
+ Instruction *otherInsn = const_cast<Instruction*>(otherDef->getInstruction());
+ if (otherInsn->getOpcode() != OP_LOADI) return false;
+ LoadImmInstruction *loadImm = cast<LoadImmInstruction>(otherInsn);
+ const Immediate imm = loadImm->getImmediate();
+ const uint64_t offset = getOffsetFromImm(imm);
+
+ // step 2 -> check that the results of the add are loads from private
+ // memory
+ const UseSet &addUseSet = dag->getUse(add, 0);
+ for (auto addUse : addUseSet) {
+ Instruction *insn = const_cast<Instruction*>(addUse->getInstruction());
+
+ // We finally find something like load dst arg+imm
+ LoadAddImm loadAddImm;
+ if (matchLoad(insn, add, loadImm, offset, argID, loadAddImm)) {
+ tmpSeq.push_back(loadAddImm);
+ continue;
+ }
+ }
+ }
+
+ // OK, the argument only need direct loads. We can now append all the
+ // direct load definitions we found
+ for (const auto &loadImmSeq : tmpSeq)
+ seq.push_back(loadImmSeq);
+ return true;
+ }
+
+ ArgUse FunctionArgumentLowerer::getArgUse(uint32_t argID)
+ {
+ FunctionArgument &arg = fn->getArg(argID);
+
+ // case 1 - we may store something to the structure argument
+ set<const Instruction*> visited;
+ if (this->useStore(ValueDef(&arg), visited))
+ return ARG_WRITTEN;
+
+ // case 2 - we look for the patterns: LOAD(ptr) or LOAD(ptr+imm)
+ if (this->matchLoadAddImm(argID))
+ return ARG_DIRECT_READ;
+
+ // case 3 - LOAD(ptr+runtime_value)
+ return ARG_INDIRECT_READ;
+ }
+
+ void FunctionArgumentLowerer::lower(uint32_t argID) {
+ IF_DEBUG(const ArgUse argUse = )this->getArgUse(argID);
+#if GBE_DEBUG
+ GBE_ASSERTM(argUse != ARG_WRITTEN,
+ "TODO A store to a structure argument "
+ "(i.e. not a char/short/int/float argument) has been found. "
+ "This is not supported yet");
+ GBE_ASSERTM(argUse != ARG_INDIRECT_READ,
+ "TODO Only direct loads of structure arguments are "
+ "supported now");
+#endif /* GBE_DEBUG */
+ }
+
+ void lowerFunctionArguments(Unit &unit, const std::string &functionName) {
+ FunctionArgumentLowerer lowerer(unit);
+ lowerer.lower(functionName);
+ }
+
+} /* namespace ir */
+} /* namespace gbe */
+
diff --git a/backend/src/ir/lowering.hpp b/backend/src/ir/lowering.hpp
new file mode 100644
index 0000000..ba0c87b
--- /dev/null
+++ b/backend/src/ir/lowering.hpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file lowering.hpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ * Lower instructions that are not supported properly. Typical example is
+ * handling returns or unsupported vector scatters / gathers
+ */
+
+#ifndef __GBE_IR_LOWERING_HPP__
+#define __GBE_IR_LOWERING_HPP__
+
+namespace gbe {
+namespace ir {
+
+ // Structure to update
+ class Unit;
+
+ /*! Remove all return instructions and replace them to forward branches that
+ * point to the only return instruction in a dedicated basic block and the end
+ * of the function.
+ * Typically this code:
+ *
+ * dst[x] = 1;
+ * if (x > 4) return;
+ * dst[x] = 3;
+ *
+ * will be replaced by:
+ *
+ * dst[x] = 1;
+ * if (x > 4) goto end;
+ * dst[x] = 3;
+ * end:
+ * return;
+ *
+ * There will be only one return at the end of the function. This return will
+ * be simply encoded as a End-of-thread instruction (EOT)
+ */
+ void lowerReturn(Unit &unit, const std::string &functionName);
+
+ /*! Function arguments are a bit tricky since we must implement the proper C
+ * semantic: we can therefore address the function arguments as we want and
+ * we can even modify them. This leads to interesting challenges. We identify
+ * several cases:
+ *
+ * case 1:
+ * int f (__global int *dst, int x[16], int y) {
+ * dst[get_global_id(0)] = x[16] + y;
+ * }
+ * Here x and y will be pushed to registers using the Curbe. No problem, we
+ * can directly used the pushed registers
+ *
+ * case 2:
+ * int f (__global int *dst, int x[16], int y) {
+ * dst[get_global_id(0)] = x[get_local_id(0)] + y;
+ * }
+ * Here x is indirectly accessed. We need to perform a gather from memory. We
+ * can simply gather it from the curbe in memory
+ *
+ * case 3:
+ * int f (__global int *dst, int x[16], int y) {
+ * x[get_local_id(0)] = y + 1;
+ * int *ptr = get_local_id(0) % 2 ? x[0] : x[1];
+ * dst[get_global_id(0)] = *ptr;
+ * }
+ * Here we modify the function argument since it is valid C. Problem is that
+ * we are running in SIMD mode while the data are scalar (in both memory and
+ * registers). In that case, we just spill everything to memory (using the
+ * stack) and reload it from here when needed.
+ */
+ void lowerFunctionArguments(Unit &unit, const std::string &functionName);
+
+} /* namespace ir */
+} /* namespace gbe */
+
+#endif /* __GBE_IR_LOWERING_HPP__ */
+
diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp
new file mode 100644
index 0000000..b8c7604
--- /dev/null
+++ b/backend/src/ir/profile.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file profile.hpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#include "ir/profile.hpp"
+#include "ir/function.hpp"
+#include "sys/platform.hpp"
+
+namespace gbe {
+namespace ir {
+
+ namespace ocl
+ {
+#if GBE_DEBUG
+#define DECL_NEW_REG(FAMILY, REG) \
+ r = fn.newRegister(FAMILY_DWORD); \
+ GBE_ASSERT(r == REG);
+#else
+#define DECL_NEW_REG(FAMILY, REG) \
+ fn.newRegister(FAMILY_DWORD);
+#endif /* GBE_DEBUG */
+ static void init(Function &fn) {
+ IF_DEBUG(Register r);
+ DECL_NEW_REG(FAMILY_DWORD, lid0);
+ DECL_NEW_REG(FAMILY_DWORD, lid1);
+ DECL_NEW_REG(FAMILY_DWORD, lid2);
+ DECL_NEW_REG(FAMILY_DWORD, groupid0);
+ DECL_NEW_REG(FAMILY_DWORD, groupid1);
+ DECL_NEW_REG(FAMILY_DWORD, groupid2);
+ DECL_NEW_REG(FAMILY_DWORD, numgroup0);
+ DECL_NEW_REG(FAMILY_DWORD, numgroup1);
+ DECL_NEW_REG(FAMILY_DWORD, numgroup2);
+ DECL_NEW_REG(FAMILY_DWORD, lsize0);
+ DECL_NEW_REG(FAMILY_DWORD, lsize1);
+ DECL_NEW_REG(FAMILY_DWORD, lsize2);
+ DECL_NEW_REG(FAMILY_DWORD, gsize0);
+ DECL_NEW_REG(FAMILY_DWORD, gsize1);
+ DECL_NEW_REG(FAMILY_DWORD, gsize2);
+ DECL_NEW_REG(FAMILY_DWORD, goffset0);
+ DECL_NEW_REG(FAMILY_DWORD, goffset1);
+ DECL_NEW_REG(FAMILY_DWORD, goffset2);
+ DECL_NEW_REG(FAMILY_DWORD, stackptr);
+ DECL_NEW_REG(FAMILY_WORD, blockip);
+ DECL_NEW_REG(FAMILY_DWORD, barrierid);
+ DECL_NEW_REG(FAMILY_DWORD, threadn);
+ }
+#undef DECL_NEW_REG
+
+ } /* namespace ocl */
+
+ void initProfile(Function &fn) {
+ const Profile profile = fn.getProfile();
+ switch (profile) {
+ case PROFILE_C: GBE_ASSERTM(false, "Unsupported profile"); break;
+ case PROFILE_OCL: ocl::init(fn);
+ };
+ }
+
+} /* namespace ir */
+} /* namespace gbe */
+
+
diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp
new file mode 100644
index 0000000..d5bb7a6
--- /dev/null
+++ b/backend/src/ir/profile.hpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file profile.hpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#ifndef __GBE_IR_PROFILE_HPP__
+#define __GBE_IR_PROFILE_HPP__
+
+#include "ir/register.hpp"
+
+namespace gbe {
+namespace ir {
+
+ /*! Profile is defined *per-function* and mostly predefined registers */
+ enum Profile : uint32_t {
+ PROFILE_C = 0, // Not used now
+ PROFILE_OCL = 1
+ };
+
+ // Will be pre-initialized based on its profile
+ class Function;
+
+ /*! Registers used for ocl */
+ namespace ocl
+ {
+ static const Register lid0 = Register(0); // get_local_id(0)
+ static const Register lid1 = Register(1); // get_local_id(1)
+ static const Register lid2 = Register(2); // get_local_id(2)
+ static const Register groupid0 = Register(3); // get_group_id(0)
+ static const Register groupid1 = Register(4); // get_group_id(1)
+ static const Register groupid2 = Register(5); // get_group_id(2)
+ static const Register numgroup0 = Register(6); // get_group_id(0)
+ static const Register numgroup1 = Register(7); // get_group_id(1)
+ static const Register numgroup2 = Register(8); // get_group_id(2)
+ static const Register lsize0 = Register(9); // get_local_size(0)
+ static const Register lsize1 = Register(10); // get_local_size(1)
+ static const Register lsize2 = Register(11); // get_local_size(2)
+ static const Register gsize0 = Register(12); // get_global_size(0)
+ static const Register gsize1 = Register(13); // get_global_size(1)
+ static const Register gsize2 = Register(14); // get_global_size(2)
+ static const Register goffset0 = Register(15); // get_global_offset(0)
+ static const Register goffset1 = Register(16); // get_global_offset(1)
+ static const Register goffset2 = Register(17); // get_global_offset(2)
+ static const Register stackptr = Register(18); // stack pointer
+ static const Register blockip = Register(19); // blockip
+ static const Register barrierid = Register(20);// barrierid
+ static const Register threadn = Register(21); // number of threads
+ static const uint32_t regNum = 22; // number of special registers
+ } /* namespace ocl */
+
+ /*! Initialize the profile of the given function */
+ void initProfile(Function &fn);
+
+} /* namespace ir */
+} /* namespace gbe */
+
+#endif /* __GBE_IR_PROFILE_HPP__ */
+
diff --git a/backend/src/ir/register.cpp b/backend/src/ir/register.cpp
new file mode 100644
index 0000000..12fc941
--- /dev/null
+++ b/backend/src/ir/register.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file register.cpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#include "ir/register.hpp"
+
+namespace gbe {
+namespace ir {
+
+ std::ostream &operator<< (std::ostream &out, const RegisterData ®Data)
+ {
+ switch (regData.family) {
+ case FAMILY_BOOL: return out << "bool";
+ case FAMILY_BYTE: return out << "byte";
+ case FAMILY_WORD: return out << "word";
+ case FAMILY_DWORD: return out << "dword";
+ case FAMILY_QWORD: return out << "qword";
+ };
+ return out;
+ }
+
+ std::ostream &operator<< (std::ostream &out, const RegisterFile &file)
+ {
+ out << "## " << file.regNum() << " register"
+ << (file.regNum() ? "s" : "") << " ##" << std::endl;
+ for (uint32_t i = 0; i < file.regNum(); ++i) {
+ const RegisterData reg = file.get(Register(i));
+ out << ".decl." << reg << " %" << i << std::endl;
+ }
+ return out;
+ }
+
+ Tuple RegisterFile::appendArrayTuple(const Register *reg, uint32_t regNum) {
+ const Tuple index = Tuple(regTuples.size());
+ for (uint32_t regID = 0; regID < regNum; ++regID) {
+ GBE_ASSERTM(reg[regID] < this->regNum(), "Out-of-bound register");
+ regTuples.push_back(reg[regID]);
+ }
+ return index;
+ }
+
+} /* namespace ir */
+} /* namespace gbe */
+
diff --git a/backend/src/ir/register.hpp b/backend/src/ir/register.hpp
new file mode 100644
index 0000000..610acb1
--- /dev/null
+++ b/backend/src/ir/register.hpp
@@ -0,0 +1,158 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file register.hpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#ifndef __GBE_IR_REGISTER_HPP__
+#define __GBE_IR_REGISTER_HPP__
+
+#include "sys/vector.hpp"
+#include "sys/platform.hpp"
+
+namespace gbe {
+namespace ir {
+
+ /*! Defines the size of the pointers. All the functions from the unit will
+ * use the same pointer size as the unit they belong to
+ */
+ enum PointerSize {
+ POINTER_32_BITS = 32,
+ POINTER_64_BITS = 64
+ };
+
+ /*! Basically provides the size of the register */
+ enum RegisterFamily : uint8_t {
+ FAMILY_BOOL = 0,
+ FAMILY_BYTE = 1,
+ FAMILY_WORD = 2,
+ FAMILY_DWORD = 3,
+ FAMILY_QWORD = 4
+ };
+
+ INLINE uint32_t getFamilySize(RegisterFamily family) {
+ switch (family) {
+ case FAMILY_BYTE: return 1;
+ case FAMILY_WORD: return 2;
+ case FAMILY_DWORD: return 4;
+ case FAMILY_QWORD: return 8;
+ default: NOT_SUPPORTED;
+ };
+ return 0;
+ }
+
+ /*! A register can be either a byte, a word, a dword or a qword. We store this
+ * value into a register data (which makes the register file)
+ */
+ class RegisterData
+ {
+ public:
+ /*! Build a register. All fields will be immutable */
+ INLINE RegisterData(RegisterFamily family = FAMILY_DWORD) : family(family) {}
+ /*! Copy constructor */
+ INLINE RegisterData(const RegisterData &other) : family(other.family) {}
+ /*! Copy operator */
+ INLINE RegisterData &operator= (const RegisterData &other) {
+ this->family = other.family;
+ return *this;
+ }
+ /*! Nothing really happens here */
+ INLINE ~RegisterData(void) {}
+ RegisterFamily family; //!< Register size or if it is a flag
+ GBE_CLASS(RegisterData);
+ };
+
+ /*! Output the register file string in the given stream */
+ std::ostream &operator<< (std::ostream &out, const RegisterData ®Data);
+
+ /*! Register is the position of the index of the register data in the register
+ * file. We enforce type safety with this class
+ */
+ TYPE_SAFE(Register, uint16_t)
+ INLINE bool operator< (const Register &r0, const Register &r1) {
+ return r0.value() < r1.value();
+ }
+
+ /*! Tuple is the position of the first register in the tuple vector. We
+ * enforce type safety with this class
+ */
+ TYPE_SAFE(Tuple, uint16_t)
+
+ /*! A register file allocates and destroys registers. Basically, we will have
+ * one register file per function
+ */
+ class RegisterFile
+ {
+ public:
+ /*! Return the index of a newly allocated register */
+ INLINE Register append(RegisterFamily family) {
+ GBE_ASSERTM(regNum() < MAX_INDEX,
+ "Too many defined registers (only 65535 are supported)");
+ const uint16_t index = regNum();
+ const RegisterData reg(family);
+ regs.push_back(reg);
+ return Register(index);
+ }
+ /*! Make a tuple from an array of register */
+ Tuple appendArrayTuple(const Register *reg, uint32_t regNum);
+ /*! Make a tuple and return the index to the first element of the tuple */
+ template <typename First, typename... Rest>
+ INLINE Tuple appendTuple(First first, Rest... rest) {
+ const Tuple index = Tuple(regTuples.size());
+ GBE_ASSERTM(first < regNum(), "Out-of-bound register");
+ regTuples.push_back(first);
+ appendTuple(rest...);
+ return index;
+ }
+ /*! To terminate variadic recursion */
+ INLINE void appendTuple(void) {}
+ /*! Return a copy of the register at index */
+ INLINE RegisterData get(Register index) const { return regs[index]; }
+ /*! Get the register index from the tuple */
+ INLINE Register get(Tuple index, uint32_t which) const {
+ return regTuples[uint16_t(index) + which];
+ }
+ /*! Set the register index from the tuple */
+ INLINE void set(Tuple index, uint32_t which, Register reg) {
+ regTuples[uint16_t(index) + which] = reg;
+ }
+ /*! Number of registers in the register file */
+ INLINE uint32_t regNum(void) const { return regs.size(); }
+ /*! Number of tuples in the register file */
+ INLINE uint32_t tupleNum(void) const { return regTuples.size(); }
+ /*! register and tuple indices are short */
+ enum { MAX_INDEX = 0xffff };
+ private:
+ vector<RegisterData> regs; //!< All the registers together
+ vector<Register> regTuples; //!< Tuples are used for many src / dst
+ GBE_CLASS(RegisterFile);
+ };
+
+ /*! Useful to encode anything special */
+ static const Register invalidRegister(RegisterFile::MAX_INDEX);
+
+ /*! Output the register file string in the given stream */
+ std::ostream &operator<< (std::ostream &out, const RegisterFile &file);
+
+} /* namespace ir */
+} /* namespace gbe */
+
+#endif /* __GBE_IR_REGISTER_HPP__ */
+
diff --git a/backend/src/ir/type.cpp b/backend/src/ir/type.cpp
new file mode 100644
index 0000000..a6a2e44
--- /dev/null
+++ b/backend/src/ir/type.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file instruction.cpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "ir/type.hpp"
+
+namespace gbe {
+namespace ir {
+ std::ostream &operator<< (std::ostream &out, const Type &type) {
+ switch (type) {
+ case TYPE_BOOL: return out << "bool";
+ case TYPE_S8: return out << "int8";
+ case TYPE_U8: return out << "uint8";
+ case TYPE_S16: return out << "int16";
+ case TYPE_U16: return out << "uin16";
+ case TYPE_S32: return out << "int32";
+ case TYPE_U32: return out << "uin32";
+ case TYPE_S64: return out << "int64";
+ case TYPE_U64: return out << "uin64";
+ case TYPE_HALF: return out << "half";
+ case TYPE_FLOAT: return out << "float";
+ case TYPE_DOUBLE: return out << "double";
+ };
+ return out;
+ }
+
+} /* namespace ir */
+} /* namespace gbe */
+
diff --git a/backend/src/ir/type.hpp b/backend/src/ir/type.hpp
new file mode 100644
index 0000000..1e24906
--- /dev/null
+++ b/backend/src/ir/type.hpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file type.hpp
+ *
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#ifndef __GBE_IR_TYPE_HPP__
+#define __GBE_IR_TYPE_HPP__
+
+#include "sys/platform.hpp"
+#include "ir/register.hpp"
+
+#include <ostream>
+
+namespace gbe {
+namespace ir {
+
+ /*! All types possibly supported by the instruction */
+ enum Type : uint8_t {
+ TYPE_BOOL = 0, //!< boolean value
+ TYPE_S8, //!< signed 8 bits integer
+ TYPE_U8, //!< unsigned 8 bits integer
+ TYPE_S16, //!< signed 16 bits integer
+ TYPE_U16, //!< unsigned 16 bits integer
+ TYPE_S32, //!< signed 32 bits integer
+ TYPE_U32, //!< unsigned 32 bits integer
+ TYPE_S64, //!< signed 64 bits integer
+ TYPE_U64, //!< unsigned 64 bits integer
+ TYPE_HALF, //!< 16 bits floating point value
+ TYPE_FLOAT, //!< 32 bits floating point value
+ TYPE_DOUBLE //!< 64 bits floating point value
+ };
+
+ /*! Output a string for the type in the given stream */
+ std::ostream &operator<< (std::ostream &out, const Type &type);
+
+ /*! Get the register family for each type */
+ INLINE RegisterFamily getFamily(Type type) {
+ switch (type) {
+ case TYPE_BOOL:
+ return FAMILY_BOOL;
+ case TYPE_S8:
+ case TYPE_U8:
+ return FAMILY_BYTE;
+ case TYPE_S16:
+ case TYPE_U16:
+ case TYPE_HALF:
+ return FAMILY_WORD;
+ case TYPE_S32:
+ case TYPE_U32:
+ case TYPE_FLOAT:
+ return FAMILY_DWORD;
+ case TYPE_S64:
+ case TYPE_U64:
+ case TYPE_DOUBLE:
+ return FAMILY_QWORD;
+ };
+ return FAMILY_DWORD;
+ }
+
+ /*! Return a type for each register family */
+ INLINE Type getType(RegisterFamily family) {
+ switch (family) {
+ case FAMILY_BOOL: return TYPE_BOOL;
+ case FAMILY_BYTE: return TYPE_U8;
+ case FAMILY_WORD: return TYPE_U16;
+ case FAMILY_DWORD: return TYPE_U32;
+ case FAMILY_QWORD: return TYPE_U64;
+ };
+ return TYPE_U32;
+ }
+
+} /* namespace ir */
+} /* namespace gbe */
+
+#endif /* __GBE_IR_TYPE_HPP__ */
+
diff --git a/backend/src/ir/unit.cpp b/backend/src/ir/unit.cpp
new file mode 100644
index 0000000..1e98afa
--- /dev/null
+++ b/backend/src/ir/unit.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file unit.cpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#include "ir/unit.hpp"
+#include "ir/function.hpp"
+
+namespace gbe {
+namespace ir {
+
+ Unit::Unit(PointerSize pointerSize) : pointerSize(pointerSize) {}
+ Unit::~Unit(void) {
+ for (const auto &pair : functions) GBE_DELETE(pair.second);
+ }
+ Function *Unit::getFunction(const std::string &name) const {
+ auto it = functions.find(name);
+ if (it == functions.end())
+ return NULL;
+ return it->second;
+ }
+ Function *Unit::newFunction(const std::string &name) {
+ auto it = functions.find(name);
+ if (it != functions.end())
+ return NULL;
+ Function *fn = GBE_NEW(Function, name, *this);
+ functions[name] = fn;
+ return fn;
+ }
+ void Unit::newConstant(const char *data,
+ const std::string &name,
+ uint32_t size,
+ uint32_t alignment)
+ {
+ constantSet.append(data, name, size, alignment);
+ }
+
+ std::ostream &operator<< (std::ostream &out, const Unit &unit) {
+ unit.apply([&out] (const Function &fn) { out << fn << std::endl; });
+ return out;
+ }
+} /* namespace ir */
+} /* namespace gbe */
+
diff --git a/backend/src/ir/unit.hpp b/backend/src/ir/unit.hpp
new file mode 100644
index 0000000..37a5dbf
--- /dev/null
+++ b/backend/src/ir/unit.hpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file unit.hpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#ifndef __GBE_IR_UNIT_HPP__
+#define __GBE_IR_UNIT_HPP__
+
+#include "ir/constant.hpp"
+#include "ir/register.hpp"
+#include "sys/hash_map.hpp"
+
+namespace gbe {
+namespace ir {
+
+ // A unit contains a set of functions
+ class Function;
+
+ /*! Complete unit of compilation. It contains a set of functions and a set of
+ * constant the functions may refer to.
+ */
+ class Unit : public NonCopyable
+ {
+ public:
+ typedef hash_map<std::string, Function*> FunctionSet;
+ /*! Create an empty unit */
+ Unit(PointerSize pointerSize = POINTER_32_BITS);
+ /*! Release everything (*including* the function pointers) */
+ ~Unit(void);
+ /*! Get the set of functions defined in the unit */
+ const FunctionSet &getFunctionSet(void) const { return functions; }
+ /*! Retrieve the function by its name */
+ Function *getFunction(const std::string &name) const;
+ /*! Return NULL if the function already exists */
+ Function *newFunction(const std::string &name);
+ /*! Create a new constant in the constant set */
+ void newConstant(const char*, const std::string&, uint32_t size, uint32_t alignment);
+ /*! Apply the given functor on all the functions */
+ template <typename T>
+ INLINE void apply(const T &functor) const {
+ for (const auto &pair : functions) functor(*pair.second);
+ }
+ /*! Return the size of the pointers manipulated */
+ INLINE PointerSize getPointerSize(void) const { return pointerSize; }
+ /*! Return the family of registers that contain pointer */
+ INLINE RegisterFamily getPointerFamily(void) const {
+ if (this->getPointerSize() == POINTER_32_BITS)
+ return FAMILY_DWORD;
+ else
+ return FAMILY_QWORD;
+ }
+ private:
+ friend class ContextInterface; //!< Can free modify the unit
+ hash_map<std::string, Function*> functions; //!< All the defined functions
+ ConstantSet constantSet; //!< All the constants defined in the unit
+ PointerSize pointerSize; //!< Size shared by all pointers
+ GBE_CLASS(Unit);
+ };
+
+ /*! Output the unit string in the given stream */
+ std::ostream &operator<< (std::ostream &out, const Unit &unit);
+
+} /* namespace ir */
+} /* namespace gbe */
+
+#endif /* __GBE_IR_UNIT_HPP__ */
+
diff --git a/backend/src/ir/value.cpp b/backend/src/ir/value.cpp
new file mode 100644
index 0000000..11eb0a2
--- /dev/null
+++ b/backend/src/ir/value.cpp
@@ -0,0 +1,594 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file value.cpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "ir/value.hpp"
+#include "ir/liveness.hpp"
+
+namespace gbe {
+namespace ir {
+
+ /*! To build the chains (i.e. basically the graph of values), we are going to
+ * iterate on liveout definitions: for each block and for each variable
+ * (ir::Register) alive at the end of the block (in Block::LiveOut), we are
+ * computing the set of all possible value definitions. Using these value
+ * definitions, we will finally transfer these sets to the successors to get
+ * the ud / du chains
+ *
+ * LiveOutSet contains the set of definitions for each basic block
+ */
+ class LiveOutSet
+ {
+ public:
+ LiveOutSet(Liveness &liveness, const FunctionDAG &dag);
+ ~LiveOutSet(void);
+ /*! One set per register */
+ typedef set<ValueDef*> RegDefSet;
+ /*! We have one map of liveout register per block */
+ typedef map<Register, RegDefSet*> BlockDefMap;
+ /*! All the block definitions map in the functions */
+ typedef map<const BasicBlock*, BlockDefMap*> FunctionDefMap;
+ /*! Performs the double look-up to get the set of defs per register */
+ RegDefSet &getDefSet(const BasicBlock *bb, Register reg);
+ /*! Build a UD-chain as the union of the predecessor chains */
+ void makeDefSet(DefSet &udChain, const BasicBlock &bb, Register reg);
+ /*! Fast per register definition set allocation */
+ DECL_POOL(RegDefSet, regDefSetPool);
+ /*! Fast register sets allocation */
+ DECL_POOL(BlockDefMap, blockDefMapPool);
+ FunctionDefMap defMap; //!< All per-block data
+ Liveness &liveness; //!< Contains LiveOut information
+ const FunctionDAG &dag; //!< Structure we are building
+ private:
+ /*! Initialize liveOut with the instruction destination values */
+ void initializeInstructionDef(void);
+ /*! Initialize liveOut with the function argument, special and pushed
+ * registers
+ */
+ void initializeOtherDef(void);
+ /*! Iterate to completely transfer the liveness and get the def sets */
+ void iterateLiveOut(void);
+ /*! Use custom allocators */
+ GBE_CLASS(LiveOutSet);
+ };
+
+ /*! Debug print of the liveout set */
+ std::ostream &operator<< (std::ostream &out, LiveOutSet &set);
+
+ LiveOutSet::LiveOutSet(Liveness &liveness, const FunctionDAG &dag) :
+ liveness(liveness), dag(dag)
+ {
+ this->initializeInstructionDef();
+ this->initializeOtherDef();
+ this->iterateLiveOut();
+ }
+
+ LiveOutSet::RegDefSet &LiveOutSet::getDefSet(const BasicBlock *bb, Register reg)
+ {
+ auto bbIt = defMap.find(bb);
+ GBE_ASSERT(bbIt != defMap.end());
+ auto defIt = bbIt->second->find(reg);
+ GBE_ASSERT(defIt != bbIt->second->end() && defIt->second != NULL);
+ return *defIt->second;
+ }
+
+ void LiveOutSet::makeDefSet(DefSet &udChain, const BasicBlock &bb, Register reg)
+ {
+ // Iterate over all the predecessors
+ const auto &preds = bb.getPredecessorSet();
+ for (const auto &pred : preds) {
+ RegDefSet &predDef = this->getDefSet(pred, reg);
+ for (auto def : predDef) udChain.insert(def);
+ }
+
+ // If this is the top block we must take into account both function
+ // arguments and special registers
+ const Function &fn = bb.getParent();
+ if (fn.isEntryBlock(bb) == false) return;
+
+ // Is it a function input?
+ const FunctionArgument *arg = fn.getArg(reg);
+ const PushLocation *pushed = fn.getPushLocation(reg);
+
+ // Is it a pushed register?
+ if (pushed != NULL) {
+ ValueDef *def = const_cast<ValueDef*>(dag.getDefAddress(pushed));
+ udChain.insert(def);
+ }
+ // Is a function argument?
+ else if (arg != NULL) {
+ ValueDef *def = const_cast<ValueDef*>(dag.getDefAddress(arg));
+ udChain.insert(def);
+ }
+ // Is it a special register?
+ else if (fn.isSpecialReg(reg) == true) {
+ ValueDef *def = const_cast<ValueDef*>(dag.getDefAddress(reg));
+ udChain.insert(def);
+ }
+ }
+
+ void LiveOutSet::initializeInstructionDef(void) {
+ const Function &fn = liveness.getFunction();
+
+ // Iterate over each block and initialize the liveOut data
+ fn.foreachBlock([&](const BasicBlock &bb) {
+ GBE_ASSERT(defMap.find(&bb) == defMap.end());
+
+ // Allocate a map of register definitions
+ auto blockDefMap = this->newBlockDefMap();
+ defMap.insert(std::make_pair(&bb, blockDefMap));
+
+ // We only consider liveout registers
+ const auto &info = this->liveness.getBlockInfo(&bb);
+ const auto &liveOut = info.liveOut;
+ for (auto reg : liveOut) {
+ GBE_ASSERT(blockDefMap->find(reg) == blockDefMap->end());
+ auto regDefSet = this->newRegDefSet();
+ blockDefMap->insert(std::make_pair(reg, regDefSet));
+ }
+
+ // Now traverse the blocks backwards and find the definition of each
+ // liveOut register
+ set<Register> defined;
+ for (auto it = --bb.end(); it != bb.end(); --it) {
+ const Instruction &insn = *it;
+ const uint32_t dstNum = insn.getDstNum();
+ for (uint32_t dstID = 0; dstID < dstNum; ++dstID) {
+ const Register reg = insn.getDst(dstID);
+ // We only take the most recent definition
+ if (defined.contains(reg) == true) continue;
+ // Not in LiveOut, so does not matter
+ if (info.inLiveOut(reg) == false) continue;
+ defined.insert(reg);
+ // Insert the outgoing definition for this register
+ auto regDefSet = blockDefMap->find(reg);
+ ValueDef *def = const_cast<ValueDef*>(this->dag.getDefAddress(&insn, dstID));
+ GBE_ASSERT(regDefSet != blockDefMap->end() && def != NULL);
+ regDefSet->second->insert(def);
+ }
+ }
+ });
+ }
+
+ void LiveOutSet::initializeOtherDef(void) {
+ const Function &fn = liveness.getFunction();
+ const uint32_t argNum = fn.argNum();
+
+ // The first block must also transfer the function arguments
+ const BasicBlock &top = fn.getTopBlock();
+ const Liveness::BlockInfo &info = this->liveness.getBlockInfo(&top);
+ GBE_ASSERT(defMap.contains(&top) == true);
+ auto blockDefMap = defMap.find(&top)->second;
+
+ // Insert all the values that are not overwritten in the block and alive at
+ // the end of it
+ for (uint32_t argID = 0; argID < argNum; ++argID) {
+ const FunctionArgument &arg = fn.getArg(argID);
+ const Register reg = arg.reg;
+ // Do not transfer dead values
+ if (info.inLiveOut(reg) == false) continue;
+ // If we overwrite it, do not transfer the initial value
+ if (info.inVarKill(reg) == true) continue;
+ ValueDef *def = const_cast<ValueDef*>(this->dag.getDefAddress(&arg));
+ auto it = blockDefMap->find(reg);
+ GBE_ASSERT(it != blockDefMap->end());
+ it->second->insert(def);
+ }
+
+ // Now transfer the special registers that are not over-written
+ const uint32_t firstID = fn.getFirstSpecialReg();
+ const uint32_t specialNum = fn.getSpecialRegNum();
+ for (uint32_t regID = firstID; regID < firstID + specialNum; ++regID) {
+ const Register reg(regID);
+ // Do not transfer dead values
+ if (info.inLiveOut(reg) == false) continue;
+ // If we overwrite it, do not transfer the initial value
+ if (info.inVarKill(reg) == true) continue;
+ ValueDef *def = const_cast<ValueDef*>(this->dag.getDefAddress(reg));
+ auto it = blockDefMap->find(reg);
+ GBE_ASSERT(it != blockDefMap->end());
+ it->second->insert(def);
+ }
+
+ // Finally do the same thing with pushed registers
+ const Function::PushMap &pushMap = fn.getPushMap();
+ for (const auto &pushed : pushMap) {
+ const Register reg = pushed.first;
+ // Do not transfer dead values
+ if (info.inLiveOut(reg) == false) continue;
+ // If we overwrite it, do not transfer the initial value
+ if (info.inVarKill(reg) == true) continue;
+ ValueDef *def = const_cast<ValueDef*>(this->dag.getDefAddress(&pushed.second));
+ auto it = blockDefMap->find(reg);
+ GBE_ASSERT(it != blockDefMap->end());
+ it->second->insert(def);
+ }
+ }
+
+ void LiveOutSet::iterateLiveOut(void) {
+ bool changed = true;
+
+ while (changed) {
+ changed = false;
+
+ // Compute the union of the current liveout definitions with the previous
+ // ones. Do not take into account the killed values though
+ liveness.foreach<DF_PRED>([&](Liveness::BlockInfo &curr,
+ const Liveness::BlockInfo &pred)
+ {
+ const BasicBlock &bb = curr.bb;
+ const BasicBlock &pbb = pred.bb;
+ for (auto reg : curr.liveOut) {
+ if (pred.inLiveOut(reg) == false) continue;
+ if (curr.inVarKill(reg) == true) continue;
+ RegDefSet &currSet = this->getDefSet(&bb, reg);
+ RegDefSet &predSet = this->getDefSet(&pbb, reg);
+
+ // Transfer the values
+ for (auto def : predSet) {
+ if (currSet.contains(def)) continue;
+ changed = true;
+ currSet.insert(def);
+ }
+ }
+ });
+ }
+ }
+
+ LiveOutSet::~LiveOutSet(void) {
+ for (const auto pair : defMap) {
+ BlockDefMap *block = pair.second;
+ for (auto regSet : *block)
+ this->deleteRegDefSet(regSet.second);
+ this->deleteBlockDefMap(block);
+ }
+ }
+
+ std::ostream &operator<< (std::ostream &out, LiveOutSet &set) {
+ for (const auto &pair : set.defMap) {
+ // To recognize the block, just print its instructions
+ out << "Block:" << std::endl;
+ for (const auto &insn : *pair.first) out << insn << std::endl;
+
+ // Iterate over all alive registers to get their definitions
+ const LiveOutSet::BlockDefMap *defMap = pair.second;
+ if (defMap->size() > 0) out << "LiveSet:" << std::endl;
+ for (const auto &pair : *defMap) {
+ const Register reg = pair.first;
+ const LiveOutSet::RegDefSet *set = pair.second;
+ for (auto def : *set) {
+ const ValueDef::Type type = def->getType();
+ if (type == ValueDef::DEF_FN_ARG)
+ out << "%" << reg << ": " << "function input" << std::endl;
+ else if (type == ValueDef::DEF_FN_PUSHED)
+ out << "%" << reg << ": " << "pushed register" << std::endl;
+ else if (type == ValueDef::DEF_SPECIAL_REG)
+ out << "%" << reg << ": " << "special register" << std::endl;
+ else {
+ const Instruction *insn = def->getInstruction();
+ out << "%" << reg << ": " << insn << " " << *insn << std::endl;
+ }
+ }
+ }
+ out << std::endl;
+ }
+ return out;
+ }
+
+ FunctionDAG::FunctionDAG(Liveness &liveness) :
+ fn(liveness.getFunction())
+ {
+ // We first start with empty chains
+ udEmpty = this->newDefSet();
+ duEmpty = this->newUseSet();
+
+ // First create the chains and insert them in their respective maps
+ fn.foreachInstruction([this](const Instruction &insn) {
+ // sources == value uses
+ const uint32_t srcNum = insn.getSrcNum();
+ for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
+ ValueUse *valueUse = this->newValueUse(&insn, srcID);
+ useName.insert(std::make_pair(*valueUse, valueUse));
+ udGraph.insert(std::make_pair(*valueUse, udEmpty));
+ }
+ // destinations == value defs
+ const uint32_t dstNum = insn.getDstNum();
+ for (uint32_t dstID = 0; dstID < dstNum; ++dstID) {
+ ValueDef *valueDef = this->newValueDef(&insn, dstID);
+ defName.insert(std::make_pair(*valueDef, valueDef));
+ duGraph.insert(std::make_pair(*valueDef, duEmpty));
+ }
+ });
+
+ // Function arguments are also value definitions
+ const uint32_t argNum = fn.argNum();
+ for (uint32_t argID = 0; argID < argNum; ++argID) {
+ const FunctionArgument &arg = fn.getArg(argID);
+ ValueDef *valueDef = this->newValueDef(&arg);
+ defName.insert(std::make_pair(*valueDef, valueDef));
+ duGraph.insert(std::make_pair(*valueDef, duEmpty));
+ }
+
+ // Special registers are also definitions
+ const uint32_t firstID = fn.getFirstSpecialReg();
+ const uint32_t specialNum = fn.getSpecialRegNum();
+ for (uint32_t regID = firstID; regID < firstID + specialNum; ++regID) {
+ const Register reg(regID);
+ ValueDef *valueDef = this->newValueDef(reg);
+ defName.insert(std::make_pair(*valueDef, valueDef));
+ duGraph.insert(std::make_pair(*valueDef, duEmpty));
+ }
+
+ // Pushed registers are also definitions
+ const Function::PushMap &pushMap = fn.getPushMap();
+ for (const auto &pushed : pushMap) {
+ ValueDef *valueDef = this->newValueDef(&pushed.second);
+ defName.insert(std::make_pair(*valueDef, valueDef));
+ duGraph.insert(std::make_pair(*valueDef, duEmpty));
+ }
+
+ // We create the liveOutSet to help us transfer the definitions
+ LiveOutSet liveOutSet(liveness, *this);
+
+ // Build UD chains traversing the blocks top to bottom
+ fn.foreachBlock([&](const BasicBlock &bb) {
+ // Track the allocated chains to be able to reuse them
+ map<Register, DefSet*> allocated;
+ // Some chains may be not used (ie they are dead). We track them to be
+ // able to deallocate them later
+ set<DefSet*> unused;
+
+ // For each instruction build the UD chains
+ const_cast<BasicBlock&>(bb).foreach([&](const Instruction &insn) {
+ // Instruction sources consumes definitions
+ const uint32_t srcNum = insn.getSrcNum();
+ for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
+ const Register src = insn.getSrc(srcID);
+ const ValueUse use(&insn, srcID);
+ auto ud = udGraph.find(use);
+ GBE_ASSERT(ud != udGraph.end());
+
+ // We already allocate the ud chain for this register
+ auto it = allocated.find(src);
+ if (it != allocated.end()) {
+ udGraph.erase(ud);
+ udGraph.insert(std::make_pair(use, it->second));
+ if (unused.contains(it->second))
+ unused.erase(it->second);
+ }
+ // Create a new one from the predecessor chains (upward used value)
+ else {
+ DefSet *udChain = this->newDefSet();
+ liveOutSet.makeDefSet(*udChain, bb, src);
+ allocated.insert(std::make_pair(src, udChain));
+ ud->second = udChain;
+ }
+ }
+
+ // Instruction destinations create new chains
+ const uint32_t dstNum = insn.getDstNum();
+ for (uint32_t dstID = 0; dstID < dstNum; ++dstID) {
+ const Register dst = insn.getDst(dstID);
+ ValueDef *def = const_cast<ValueDef*>(this->getDefAddress(&insn, dstID));
+ DefSet *udChain = this->newDefSet();
+ udChain->insert(def);
+ unused.insert(udChain);
+ // Remove the previous definition if any
+ if (allocated.contains(dst) == true)
+ allocated.erase(dst);
+ allocated.insert(std::make_pair(dst, udChain));
+ }
+ });
+
+ // Deallocate unused chains
+ for (auto set : unused) this->deleteDefSet(set);
+ });
+
+ // Build the DU chains from the UD ones
+ fn.foreachInstruction([&](const Instruction &insn) {
+
+ // For each value definition of each source, we push back this use
+ const uint32_t srcNum = insn.getSrcNum();
+ for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
+ ValueUse *use = const_cast<ValueUse*>(getUseAddress(&insn, srcID));
+
+ // Find all definitions for this source
+ const auto &defs = this->getDef(&insn, srcID);
+ for (auto def : defs) {
+ auto uses = duGraph.find(*def);
+ UseSet *du = uses->second;
+ GBE_ASSERT(uses != duGraph.end());
+ if (du == duEmpty) {
+ duGraph.erase(*def);
+ du = this->newUseSet();
+ duGraph.insert(std::make_pair(*def, du));
+ }
+ du->insert(use);
+ }
+ }
+ });
+
+ // Allocate the set of uses and defs per register
+ const uint32_t regNum = fn.regNum();
+ for (uint32_t regID = 0; regID < regNum; ++regID) {
+ const Register reg(regID);
+ UseSet *useSet = GBE_NEW_NO_ARG(UseSet);
+ DefSet *defSet = GBE_NEW_NO_ARG(DefSet);
+ regUse.insert(std::make_pair(reg, useSet));
+ regDef.insert(std::make_pair(reg, defSet));
+ }
+
+ // Fill use sets (one per register)
+ for (auto &useSet : duGraph) {
+ for (auto use : *useSet.second) {
+ const Register reg = use->getRegister();
+ auto it = regUse.find(reg);
+ GBE_ASSERT(it != regUse.end() && it->second != NULL);
+ it->second->insert(use);
+ }
+ }
+
+ // Fill def sets (one per register)
+ for (auto &defSet : udGraph) {
+ for (auto def : *defSet.second) {
+ const Register reg = def->getRegister();
+ auto it = regDef.find(reg);
+ GBE_ASSERT(it != regDef.end() && it->second != NULL);
+ it->second->insert(def);
+ }
+ }
+ }
+
+/*! Helper to deallocate objects */
+#define PTR_RELEASE(TYPE, VAR) \
+ do { \
+ if (VAR && destroyed.contains(VAR) == false) { \
+ destroyed.insert(VAR); \
+ delete##TYPE(VAR); \
+ } \
+ } while (0)
+
+ FunctionDAG::~FunctionDAG(void) {
+
+ // We track the already destroyed pointers
+ set<void*> destroyed;
+
+ // Release the empty ud-chains and du-chains
+ PTR_RELEASE(DefSet, udEmpty);
+ PTR_RELEASE(UseSet, duEmpty);
+
+ // We free all the ud-chains
+ for (const auto &pair : udGraph) {
+ auto defs = pair.second;
+ if (destroyed.contains(defs)) continue;
+ for (auto def : *defs) PTR_RELEASE(ValueDef, def);
+ PTR_RELEASE(DefSet, defs);
+ }
+
+ // We free all the du-chains
+ for (const auto &pair : duGraph) {
+ auto uses = pair.second;
+ if (destroyed.contains(uses)) continue;
+ for (auto use : *uses) PTR_RELEASE(ValueUse, use);
+ PTR_RELEASE(UseSet, uses);
+ }
+
+ // Release all the use and definition sets per register
+ for (const auto &pair : regUse) GBE_SAFE_DELETE(pair.second);
+ for (const auto &pair : regDef) GBE_SAFE_DELETE(pair.second);
+ }
+#undef PTR_RELEASE
+
+ const UseSet &FunctionDAG::getUse(const ValueDef &def) const {
+ auto it = duGraph.find(def);
+ GBE_ASSERT(it != duGraph.end());
+ return *it->second;
+ }
+ const UseSet &FunctionDAG::getUse(const Instruction *insn, uint32_t dstID) const {
+ return this->getUse(ValueDef(insn, dstID));
+ }
+ const UseSet &FunctionDAG::getUse(const FunctionArgument *arg) const {
+ return this->getUse(ValueDef(arg));
+ }
+ const UseSet &FunctionDAG::getUse(const Register ®) const {
+ return this->getUse(ValueDef(reg));
+ }
+ const DefSet &FunctionDAG::getDef(const ValueUse &use) const {
+ auto it = udGraph.find(use);
+ GBE_ASSERT(it != udGraph.end());
+ return *it->second;
+ }
+ const DefSet &FunctionDAG::getDef(const Instruction *insn, uint32_t srcID) const {
+ return this->getDef(ValueUse(insn, srcID));
+ }
+ const ValueDef *FunctionDAG::getDefAddress(const ValueDef &def) const {
+ auto it = defName.find(def);
+ GBE_ASSERT(it != defName.end() && it->second != NULL);
+ return it->second;
+ }
+ const ValueDef *FunctionDAG::getDefAddress(const PushLocation *pushed) const {
+ return this->getDefAddress(ValueDef(pushed));
+ }
+ const ValueDef *FunctionDAG::getDefAddress(const Instruction *insn, uint32_t dstID) const {
+ return this->getDefAddress(ValueDef(insn, dstID));
+ }
+ const ValueDef *FunctionDAG::getDefAddress(const FunctionArgument *arg) const {
+ return this->getDefAddress(ValueDef(arg));
+ }
+ const ValueDef *FunctionDAG::getDefAddress(const Register ®) const {
+ return this->getDefAddress(ValueDef(reg));
+ }
+ const ValueUse *FunctionDAG::getUseAddress(const Instruction *insn, uint32_t srcID) const {
+ const ValueUse use(insn, srcID);
+ auto it = useName.find(use);
+ GBE_ASSERT(it != useName.end() && it->second != NULL);
+ return it->second;
+ }
+
+ std::ostream &operator<< (std::ostream &out, const FunctionDAG &dag) {
+ const Function &fn = dag.getFunction();
+
+ // Print all uses for the definitions and all definitions for each uses
+ fn.foreachInstruction([&](const Instruction &insn) {
+ out << &insn << ": " << insn << std::endl;
+
+ // Display the set of definition for each destination
+ const uint32_t dstNum = insn.getDstNum();
+ if (dstNum > 0) out << "USES:" << std::endl;
+ for (uint32_t dstID = 0; dstID < dstNum; ++dstID) {
+ const Register reg = insn.getDst(dstID);
+ const auto &uses = dag.getUse(&insn, dstID);
+ for (auto use : uses) {
+ const Instruction *other = use->getInstruction();
+ out << " %" << reg << " " << other << ": " << *other << std::endl;
+ }
+ }
+
+ // Display the set of definitions for each source
+ const uint32_t srcNum = insn.getSrcNum();
+ if (srcNum > 0) out << "DEFS:" << std::endl;
+ for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
+ const Register reg = insn.getSrc(srcID);
+ const auto &defs = dag.getDef(&insn, srcID);
+ for (auto def : defs) {
+ if (def->getType() == ValueDef::DEF_FN_PUSHED)
+ out << " %" << reg << " # pushed register" << std::endl;
+ else if (def->getType() == ValueDef::DEF_FN_ARG)
+ out << " %" << reg << " # function argument" << std::endl;
+ else if (def->getType() == ValueDef::DEF_SPECIAL_REG)
+ out << " %" << reg << " # special register" << std::endl;
+ else {
+ const Instruction *other = def->getInstruction();
+ out << " %" << reg << " " << other << ": " << *other << std::endl;
+ }
+ }
+ }
+ out << std::endl;
+ });
+
+ return out;
+ }
+
+} /* namespace ir */
+} /* namespace gbe */
+
diff --git a/backend/src/ir/value.hpp b/backend/src/ir/value.hpp
new file mode 100644
index 0000000..47b9048
--- /dev/null
+++ b/backend/src/ir/value.hpp
@@ -0,0 +1,266 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file value.hpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#ifndef __GBE_IR_VALUE_HPP__
+#define __GBE_IR_VALUE_HPP__
+
+#include "ir/instruction.hpp"
+#include "ir/function.hpp"
+#include "sys/set.hpp"
+#include "sys/map.hpp"
+
+namespace gbe {
+namespace ir {
+
+ // Make UD-Chain and DU-Chain computations faster and easier
+ class Liveness;
+
+ /*! A value definition is a destination of an instruction or a function
+ * argument. Since we support multiple destinations, we also add the
+ * destination ID.
+ */
+ class ValueDef
+ {
+ public:
+ /*! Discriminates the kind of values */
+ enum Type : uint32_t {
+ DEF_FN_ARG = 0,
+ DEF_FN_PUSHED = 1,
+ DEF_INSN_DST = 2,
+ DEF_SPECIAL_REG = 3
+ };
+ /*! Build a value from an instruction destination */
+ explicit ValueDef(const Instruction *insn, uint32_t dstID = 0u) :
+ type(DEF_INSN_DST)
+ {
+ this->data.insn = insn;
+ this->data.dstID = dstID;
+ }
+ /*! Build a value from a function argument */
+ explicit ValueDef(const FunctionArgument *arg) : type(DEF_FN_ARG) {
+ this->data.arg = arg;
+ }
+ /*! Build a value from a pushed register */
+ explicit ValueDef(const PushLocation *pushed) : type(DEF_FN_PUSHED) {
+ this->data.pushed = pushed;
+ }
+ /*! Build a value from a special register */
+ explicit ValueDef(const Register ®) : type(DEF_SPECIAL_REG) {
+ this->data.regID = uint32_t(reg);
+ }
+ /*! Get the type of the value */
+ INLINE Type getType(void) const { return type; }
+ /*! Get the instruction (only if this is a instruction value) */
+ INLINE const Instruction *getInstruction(void) const {
+ GBE_ASSERT(type == DEF_INSN_DST);
+ return data.insn;
+ }
+ /*! Get the destination ID (only if this is a instruction value) */
+ INLINE uint32_t getDstID(void) const {
+ GBE_ASSERT(type == DEF_INSN_DST);
+ return data.dstID;
+ }
+ /*! Get the function input (only if this is a function argument) */
+ INLINE const FunctionArgument *getFunctionArgument(void) const {
+ GBE_ASSERT(type == DEF_FN_ARG);
+ return data.arg;
+ }
+ /*! Get the pushed location */
+ INLINE const PushLocation *getPushLocation(void) const {
+ GBE_ASSERT(type == DEF_FN_PUSHED);
+ return data.pushed;
+ }
+ /*! Get the special register */
+ INLINE Register getSpecialReg(void) const {
+ GBE_ASSERT(type == DEF_SPECIAL_REG);
+ return Register(data.regID);
+ }
+ /*! Retrieve the register associated to the definition */
+ INLINE Register getRegister(void) const {
+ if (type == DEF_SPECIAL_REG)
+ return Register(data.regID);
+ else if (type == DEF_FN_ARG)
+ return data.arg->reg;
+ else if (type == DEF_FN_PUSHED)
+ return data.pushed->getRegister();
+ else
+ return data.insn->getDst(data.dstID);
+ }
+
+ private:
+ /*! Instruction or function argument */
+ union Data {
+ /*! Instruction destination or ... */
+ struct {
+ const Instruction *insn; //<! Instruction itself
+ uint32_t dstID; //<! Which destination we take into account
+ };
+ /*! Pushed value */
+ const PushLocation *pushed;
+ /*! ... function argument or ... */
+ const FunctionArgument *arg;
+ /*! ... special register */
+ uint32_t regID;
+ } data;
+ /*!< Function argument or instruction dst? */
+ Type type;
+ GBE_CLASS(ValueDef); // Use gbe allocators
+ };
+
+ /*! Compare two value definitions (used in maps) */
+ INLINE bool operator< (const ValueDef &def0, const ValueDef &def1) {
+ const ValueDef::Type type0 = def0.getType();
+ const ValueDef::Type type1 = def1.getType();
+ if (type0 != type1) return uint32_t(type0) < uint32_t(type1);
+ if (type0 == ValueDef::DEF_FN_ARG) {
+ const FunctionArgument *in0 = def0.getFunctionArgument();
+ const FunctionArgument *in1 = def1.getFunctionArgument();
+ return uintptr_t(in0) < uintptr_t(in1);
+ } else if (type0 == ValueDef::DEF_FN_PUSHED) {
+ const PushLocation *pushed0 = def0.getPushLocation();
+ const PushLocation *pushed1 = def1.getPushLocation();
+ return uintptr_t(pushed0) < uintptr_t(pushed1);
+ } else if (type0 == ValueDef::DEF_SPECIAL_REG) {
+ const Register reg0 = def0.getSpecialReg();
+ const Register reg1 = def1.getSpecialReg();
+ return uint32_t(reg0) < uint32_t(reg1);
+ } else {
+ const Instruction *insn0 = def0.getInstruction();
+ const Instruction *insn1 = def1.getInstruction();
+ if (insn0 != insn1) return uintptr_t(insn0) < uintptr_t(insn1);
+ const uint32_t dst0 = def0.getDstID();
+ const uint32_t dst1 = def1.getDstID();
+ return dst0 < dst1;
+ }
+ }
+
+ /*! A value use describes a instruction source. This is the place where a
+ * value is used
+ */
+ class ValueUse
+ {
+ public:
+ /*! Build a value use */
+ explicit ValueUse(const Instruction *insn, uint32_t srcID = 0u) :
+ insn(insn), srcID(srcID) {}
+ /*! Get the instruction of the use */
+ const Instruction *getInstruction(void) const { return insn; }
+ /*! Get the source index for this use */
+ uint32_t getSrcID(void) const { return srcID; }
+ /*! Get the register for this use */
+ Register getRegister(void) const { return insn->getSrc(srcID); }
+ private:
+ const Instruction *insn; //!< Instruction where the value is used
+ uint32_t srcID; //!< Index of the source in the instruction
+ GBE_CLASS(ValueUse); // Use gbe allocators
+ };
+
+ /*! Compare two value uses (used in maps) */
+ INLINE bool operator< (const ValueUse &use0, const ValueUse &use1) {
+ const Instruction *insn0 = use0.getInstruction();
+ const Instruction *insn1 = use1.getInstruction();
+ if (insn0 != insn1) return uintptr_t(insn0) < uintptr_t(insn1);
+ const uint32_t src0 = use0.getSrcID();
+ const uint32_t src1 = use1.getSrcID();
+ return src0 < src1;
+ }
+
+ /*! All uses of a definition */
+ typedef set<ValueUse*> UseSet;
+ /*! All possible definitions for a use */
+ typedef set<ValueDef*> DefSet;
+
+ /*! Get the chains (in both directions) for the complete program. This data
+ * structure is unfortunately way too brutal. Using std::sets all over the
+ * place just burns a huge amount of memory. There is work to do to decrease
+ * the memory footprint
+ */
+ class FunctionDAG : public NonCopyable
+ {
+ public:
+ /*! Build the complete DU/UD graphs for the program included in liveness */
+ FunctionDAG(Liveness &liveness);
+ /*! Free all the resources */
+ ~FunctionDAG(void);
+ /*! Get the du-chain for the definition */
+ const UseSet &getUse(const ValueDef &def) const;
+ /*! Get the du-chain for the given instruction and destination */
+ const UseSet &getUse(const Instruction *insn, uint32_t dstID) const;
+ /*! Get the du-chain for the given function input */
+ const UseSet &getUse(const FunctionArgument *arg) const;
+ /*! Get the du-chain for the given pushed location */
+ const UseSet &getUse(const PushLocation *pushed) const;
+ /*! Get the du-chain for the given special register */
+ const UseSet &getUse(const Register ®) const;
+ /*! Get the ud-chain for the given use */
+ const DefSet &getDef(const ValueUse &use) const;
+ /*! Get the ud-chain for the instruction and source */
+ const DefSet &getDef(const Instruction *insn, uint32_t srcID) const;
+ /*! Get the pointer to the definition *as stored in the DAG* */
+ const ValueDef *getDefAddress(const ValueDef &def) const;
+ /*! Get the pointer to the definition *as stored in the DAG* */
+ const ValueDef *getDefAddress(const PushLocation *pushed) const;
+ /*! Get the pointer to the definition *as stored in the DAG* */
+ const ValueDef *getDefAddress(const Instruction *insn, uint32_t dstID) const;
+ /*! Get the pointer to the definition *as stored in the DAG* */
+ const ValueDef *getDefAddress(const FunctionArgument *input) const;
+ /*! Get the pointer to the definition *as stored in the DAG* */
+ const ValueDef *getDefAddress(const Register ®) const;
+ /*! Get the pointer to the use *as stored in the DAG* */
+ const ValueUse *getUseAddress(const Instruction *insn, uint32_t srcID) const;
+ /*! Get the set of all uses for the register */
+ const UseSet *getRegUse(const Register ®) const;
+ /*! Get the set of all definitions for the register */
+ const DefSet *getRegDef(const Register ®) const;
+ /*! Get the function we have the graph for */
+ INLINE const Function &getFunction(void) const { return fn; }
+ /*! The DefSet for each definition use */
+ typedef map<ValueUse, DefSet*> UDGraph;
+ /*! The UseSet for each definition */
+ typedef map<ValueDef, UseSet*> DUGraph;
+ private:
+ UDGraph udGraph; //!< All the UD chains
+ DUGraph duGraph; //!< All the DU chains
+ DefSet *udEmpty; //!< Void use set
+ UseSet *duEmpty; //!< Void def set
+ ValueDef *undefined; //!< Undefined value
+ map<ValueUse, ValueUse*> useName; //!< Get the ValueUse pointer from the value
+ map<ValueDef, ValueDef*> defName; //!< Get the ValueDef pointer from the value
+ map<Register, UseSet*> regUse; //!< All uses of registers
+ map<Register, DefSet*> regDef; //!< All defs of registers
+ DECL_POOL(ValueDef, valueDefPool); //!< Fast ValueDef allocation
+ DECL_POOL(ValueUse, valueUsePool); //!< Fast ValueUse allocation
+ DECL_POOL(DefSet, udChainPool); //!< Fast DefSet allocation
+ DECL_POOL(UseSet, duChainPool); //!< Fast UseSet allocation
+ const Function &fn; //!< Function we are referring to
+ GBE_CLASS(FunctionDAG); // Use internal allocators
+ };
+
+ /*! Pretty print of the function DAG */
+ std::ostream &operator<< (std::ostream &out, const FunctionDAG &dag);
+
+} /* namespace ir */
+} /* namespace gbe */
+
+#endif /* __GBE_IR_VALUE_HPP__ */
+
diff --git a/backend/src/llvm/CMakeLists.txt b/backend/src/llvm/CMakeLists.txt
new file mode 100644
index 0000000..b99aa6c
--- /dev/null
+++ b/backend/src/llvm/CMakeLists.txt
@@ -0,0 +1,19 @@
+include (${LLVM_DIR}/AddLLVM.cmake)
+include (${LLVM_DIR}/AddLLVMDefinitions.cmake)
+include (${LLVM_DIR}/HandleLLVMOptions.cmake)
+include (${LLVM_DIR}/LLVMProcessSources.cmake)
+include_directories(${LLVM_INCLUDE_DIRS})
+include_directories(../)
+add_llvm_target(GenBackend
+ llvm_to_gen.cpp
+ llvm_gen_backend.cpp
+ llvm_passes.cpp)
+target_link_libraries (LLVMGenBackend
+ LLVMSupport
+ LLVMAnalysis
+ LLVMCodeGen
+ LLVMCore
+ LLVMScalarOpts
+ LLVMTarget
+ LLVMTransformUtils)
+
diff --git a/backend/src/llvm/Makefile b/backend/src/llvm/Makefile
new file mode 100644
index 0000000..71f8a63
--- /dev/null
+++ b/backend/src/llvm/Makefile
@@ -0,0 +1,3 @@
+TOP=../..
+SUBDIRS=.
+include ../../Makefile.shared
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
new file mode 100644
index 0000000..4ab0372
--- /dev/null
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -0,0 +1,1980 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file llvm_gen_backend.cpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/* Transform the LLVM IR code into Gen IR code i.e. our temporary representation
+ * for programs running on Gen.
+ *
+ * Overview
+ * ========
+ *
+ * This code is mostly inspired by the (now defunct and replaced by CppBackend)
+ * CBackend. Basically, there are two ways to transform LLVM code into machine
+ * code (or anything else)
+ * - You write a complete LLVM backend by the book. LLVM proposes a lot of
+ * useful tools to do so. This is obviously the path chosen by all CPU guys
+ * but also by AMD and nVidia which both use the backend infrastructure to
+ * output their own intermediate language. The good point is that you can
+ * reuse a lot of tools (like proper PHI elimination with phi congruence and
+ * global copy propagation a la Chaitin). Bad points are:
+ * 1/ It is a *long* journey to generate anything.
+ * 2/ More importantly, the code is hugely biased towards CPUs. Typically,
+ * the way registers are defined do not fit well Gen register file (which
+ * is really more like a regular piece of memory). Same issue apply for
+ * predicated instructions with mask which is a bit boring to use with
+ * SSA. Indeed, since DAGSelection still manipulates SSA values, anything
+ * predicated requires to insert extra sources
+ * - You write function passes to do the translation yourself. Obviously, you
+ * reinvent the wheel. However, it is easy to do and easier to maintain
+ * (somehow)
+ *
+ * So, the code here just traverses LLVM asm and generates our own ISA. The
+ * generated code is OK even if a global copy propagation pass is still overdue.
+ * Right now, it is pretty straighforward and simplistic in that regard
+ *
+ * About Clang and the ABI / target
+ * ================================
+ *
+ * A major question is: how did we actually generate this LLVM code from OpenCL?
+ * Well, thing is that there is no generic target in LLVM since there are many
+ * dependencies on endianness or ABIs. Fortunately, the ptx (and nvptx for LLVM
+ * 3.2) profile is pretty well adapted to our needs since NV and Gen GPU are
+ * kind of similar, or at least they are similar enough to share the same front
+ * end.
+ *
+ * Problems
+ * ========
+ *
+ * - Several things regarding constants like ConstantExpr are not properly handled.
+ * - ptx front end generates function calls. Since we do not support them yet,
+ * the user needs to force the inlining of all functions. If a function call
+ * is intercepted, we just abort
+ */
+
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/ConstantsScanner.h"
+#include "llvm/Analysis/FindUsedTypes.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#if !defined(LLVM_VERSION_MAJOR) || (LLVM_VERSION_MINOR == 1)
+#include "llvm/Target/TargetData.h"
+#endif
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Config/config.h"
+
+#include "llvm/llvm_gen_backend.hpp"
+#include "ir/context.hpp"
+#include "ir/unit.hpp"
+#include "ir/liveness.hpp"
+#include "sys/map.hpp"
+#include "sys/set.hpp"
+#include "sys/cvar.hpp"
+#include <algorithm>
+
+/* Not defined for LLVM 3.0 */
+#if !defined(LLVM_VERSION_MAJOR)
+#define LLVM_VERSION_MAJOR 3
+#endif /* !defined(LLVM_VERSION_MAJOR) */
+
+#if !defined(LLVM_VERSION_MINOR)
+#define LLVM_VERSION_MINOR 0
+#endif /* !defined(LLVM_VERSION_MINOR) */
+
+#if (LLVM_VERSION_MAJOR != 3) || (LLVM_VERSION_MINOR > 2)
+#error "Only LLVM 3.0 / 3.1 is supported"
+#endif /* (LLVM_VERSION_MAJOR != 3) && (LLVM_VERSION_MINOR >= 2) */
+
+using namespace llvm;
+
+namespace gbe
+{
+ /*! Gen IR manipulates only scalar types */
+ static bool isScalarType(const Type *type)
+ {
+ return type->isFloatTy() ||
+ type->isIntegerTy() ||
+ type->isDoubleTy() ||
+ type->isPointerTy();
+ }
+
+ /*! LLVM IR Type to Gen IR type translation */
+ static ir::Type getType(const ir::Context &ctx, const Type *type)
+ {
+ GBE_ASSERT(isScalarType(type));
+ if (type->isFloatTy() == true)
+ return ir::TYPE_FLOAT;
+ if (type->isDoubleTy() == true)
+ return ir::TYPE_DOUBLE;
+ if (type->isPointerTy() == true) {
+ if (ctx.getPointerSize() == ir::POINTER_32_BITS)
+ return ir::TYPE_U32;
+ else
+ return ir::TYPE_U64;
+ }
+ GBE_ASSERT(type->isIntegerTy() == true);
+ if (type == Type::getInt1Ty(type->getContext()))
+ return ir::TYPE_BOOL;
+ if (type == Type::getInt8Ty(type->getContext()))
+ return ir::TYPE_S8;
+ if (type == Type::getInt16Ty(type->getContext()))
+ return ir::TYPE_S16;
+ if (type == Type::getInt32Ty(type->getContext()))
+ return ir::TYPE_S32;
+ if (type == Type::getInt64Ty(type->getContext()))
+ return ir::TYPE_S64;
+ GBE_ASSERT(0);
+ return ir::TYPE_S64;
+ }
+
+ /*! Type to register family translation */
+ static ir::RegisterFamily getFamily(const ir::Context &ctx, const Type *type)
+ {
+ GBE_ASSERT(isScalarType(type) == true);
+ if (type == Type::getInt1Ty(type->getContext()))
+ return ir::FAMILY_BOOL;
+ if (type == Type::getInt8Ty(type->getContext()))
+ return ir::FAMILY_BYTE;
+ if (type == Type::getInt16Ty(type->getContext()))
+ return ir::FAMILY_WORD;
+ if (type == Type::getInt32Ty(type->getContext()) || type->isFloatTy())
+ return ir::FAMILY_DWORD;
+ if (type == Type::getInt64Ty(type->getContext()) || type->isDoubleTy())
+ return ir::FAMILY_QWORD;
+ if (type->isPointerTy())
+ return ctx.getPointerFamily();
+ GBE_ASSERT(0);
+ return ir::FAMILY_BOOL;
+ }
+
+ /*! Get number of element to process dealing either with a vector or a scalar
+ * value
+ */
+ static ir::Type getVectorInfo(const ir::Context &ctx, Type *llvmType, Value *value, uint32_t &elemNum)
+ {
+ ir::Type type;
+ if (llvmType->isVectorTy() == true) {
+ VectorType *vectorType = cast<VectorType>(llvmType);
+ Type *elementType = vectorType->getElementType();
+ elemNum = vectorType->getNumElements();
+ type = getType(ctx, elementType);
+ } else {
+ elemNum = 1;
+ type = getType(ctx, llvmType);
+ }
+ return type;
+ }
+
+ /*! OCL to Gen-IR address type */
+ static INLINE ir::AddressSpace addressSpaceLLVMToGen(unsigned llvmMemSpace) {
+ switch (llvmMemSpace) {
+ case 0: return ir::MEM_PRIVATE;
+ case 1: return ir::MEM_GLOBAL;
+ case 2: return ir::MEM_CONSTANT;
+ case 4: return ir::MEM_LOCAL;
+ }
+ GBE_ASSERT(false);
+ return ir::MEM_GLOBAL;
+ }
+
+ /*! Handle the LLVM IR Value to Gen IR register translation. This has 2 roles:
+ * - Split the LLVM vector into several scalar values
+ * - Handle the transparent copies (bitcast or use of intrincics functions
+ * like get_local_id / get_global_id
+ */
+ class RegisterTranslator
+ {
+ public:
+ RegisterTranslator(ir::Context &ctx) : ctx(ctx) {}
+
+ /*! Empty the maps */
+ void clear(void) {
+ valueMap.clear();
+ scalarMap.clear();
+ }
+ /*! Some values will not be allocated. For example, a bit-cast destination
+ * like: %fake = bitcast %real or a vector insertion since we do not have
+ * vectors in Gen-IR
+ */
+ void newValueProxy(Value *real,
+ Value *fake,
+ uint32_t realIndex = 0u,
+ uint32_t fakeIndex = 0u) {
+ const ValueIndex key(fake, fakeIndex);
+ const ValueIndex value(real, realIndex);
+ GBE_ASSERT(valueMap.find(key) == valueMap.end()); // Do not insert twice
+ valueMap[key] = value;
+ }
+ /*! Mostly used for the preallocated registers (lids, gids) */
+ void newScalarProxy(ir::Register reg, Value *value, uint32_t index = 0u) {
+ const ValueIndex key(value, index);
+ GBE_ASSERT(scalarMap.find(key) == scalarMap.end());
+ scalarMap[key] = reg;
+ }
+ /*! Allocate a new scalar register */
+ ir::Register newScalar(Value *value, Value *key = NULL, uint32_t index = 0u)
+ {
+ GBE_ASSERT(dyn_cast<Constant>(value) == NULL);
+ Type *type = value->getType();
+ auto typeID = type->getTypeID();
+ switch (typeID) {
+ case Type::IntegerTyID:
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ case Type::PointerTyID:
+ GBE_ASSERT(index == 0);
+ return this->newScalar(value, key, type, index);
+ break;
+ case Type::VectorTyID:
+ {
+ auto vectorType = cast<VectorType>(type);
+ auto elementType = vectorType->getElementType();
+ auto elementTypeID = elementType->getTypeID();
+ if (elementTypeID != Type::IntegerTyID &&
+ elementTypeID != Type::FloatTyID &&
+ elementTypeID != Type::DoubleTyID)
+ GBE_ASSERTM(false, "Vectors of elements are not supported");
+ return this->newScalar(value, key, elementType, index);
+ break;
+ }
+ default: NOT_SUPPORTED;
+ };
+ return ir::Register();
+ }
+ /*! Get the register from the given value at given index possibly iterating
+ * in the value map to get the final real register
+ */
+ ir::Register getScalar(Value *value, uint32_t index = 0u) {
+ auto end = valueMap.end();
+ for (;;) {
+ auto it = valueMap.find(std::make_pair(value, index));
+ if (it == end)
+ break;
+ else {
+ value = it->second.first;
+ index = it->second.second;
+ }
+ }
+ const auto key = std::make_pair(value, index);
+ GBE_ASSERT(scalarMap.find(key) != scalarMap.end());
+ return scalarMap[key];
+ }
+ /*! Insert a given register at given Value position */
+ void insertRegister(const ir::Register ®, Value *value, uint32_t index) {
+ const auto key = std::make_pair(value, index);
+ GBE_ASSERT(scalarMap.find(key) == scalarMap.end());
+ scalarMap[key] = reg;
+ }
+ /*! Says if the value exists. Otherwise, it is undefined */
+ bool valueExists(Value *value, uint32_t index) {
+ auto end = valueMap.end();
+ for (;;) {
+ auto it = valueMap.find(std::make_pair(value, index));
+ if (it == end)
+ break;
+ else {
+ value = it->second.first;
+ index = it->second.second;
+ }
+ }
+ const auto key = std::make_pair(value, index);
+ return scalarMap.find(key) != scalarMap.end();
+ }
+ private:
+ /*! This creates a scalar register for a Value (index is the vector index when
+ * the value is a vector of scalars)
+ */
+ ir::Register newScalar(Value *value, Value *key, Type *type, uint32_t index) {
+ const ir::RegisterFamily family = getFamily(ctx, type);
+ const ir::Register reg = ctx.reg(family);
+ key = key == NULL ? value : key;
+ this->insertRegister(reg, key, index);
+ return reg;
+ }
+ /*! Indices will be zero for scalar values */
+ typedef std::pair<Value*, uint32_t> ValueIndex;
+ /*! Map value to ir::Register */
+ map<ValueIndex, ir::Register> scalarMap;
+ /*! Map values to values when this is only a translation (eq bitcast) */
+ map<ValueIndex, ValueIndex> valueMap;
+ /*! Actually allocates the registers */
+ ir::Context &ctx;
+ };
+ /*! All intrinsic Gen functions */
+ enum OCLInstrinsic {
+#define DECL_LLVM_GEN_FUNCTION(ID, NAME) GEN_OCL_##ID,
+#include "llvm_gen_ocl_function.hxx"
+#undef DECL_LLVM_GEN_FUNCTION
+ };
+
+ /*! Build the hash map for OCL functions on Gen */
+ struct OCLIntrinsicMap {
+ /*! Build the intrinsic hash map */
+ OCLIntrinsicMap(void) {
+#define DECL_LLVM_GEN_FUNCTION(ID, NAME) \
+ map.insert(std::make_pair(#NAME, GEN_OCL_##ID));
+#include "llvm_gen_ocl_function.hxx"
+#undef DECL_LLVM_GEN_FUNCTION
+ }
+ /*! Sort intrinsics with their names */
+ hash_map<std::string, OCLInstrinsic> map;
+ };
+
+ /*! Sort the OCL Gen instrinsic functions (built on pre-main) */
+ static const OCLIntrinsicMap instrinsicMap;
+
+ /*! Translate LLVM IR code to Gen IR code */
+ class GenWriter : public FunctionPass, public InstVisitor<GenWriter>
+ {
+ /*! Unit to compute */
+ ir::Unit &unit;
+ /*! Helper structure to compute the unit */
+ ir::Context ctx;
+ /*! Make the LLVM-to-Gen translation */
+ RegisterTranslator regTranslator;
+ /*! Map target basic block to its ir::LabelIndex */
+ map<const BasicBlock*, ir::LabelIndex> labelMap;
+ /*! Condition inversion can simplify branch code. We store here all the
+ * compare instructions we need to invert to decrease branch complexity
+ */
+ set<const Value*> conditionSet;
+ /*! We visit each function twice. Once to allocate the registers and once to
+ * emit the Gen IR instructions
+ */
+ enum Pass {
+ PASS_EMIT_REGISTERS = 0,
+ PASS_EMIT_INSTRUCTIONS = 1
+ } pass;
+
+ LoopInfo *LI;
+ const Module *TheModule;
+
+ public:
+ static char ID;
+ explicit GenWriter(ir::Unit &unit)
+ : FunctionPass(ID),
+ unit(unit),
+ ctx(unit),
+ regTranslator(ctx),
+ LI(0),
+ TheModule(0)
+ {
+ initializeLoopInfoPass(*PassRegistry::getPassRegistry());
+ pass = PASS_EMIT_REGISTERS;
+ }
+
+ virtual const char *getPassName() const { return "Gen Back-End"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfo>();
+ AU.setPreservesAll();
+ }
+
+ virtual bool doInitialization(Module &M);
+
+ bool runOnFunction(Function &F) {
+ // Do not codegen any 'available_externally' functions at all, they have
+ // definitions outside the translation unit.
+ if (F.hasAvailableExternallyLinkage())
+ return false;
+
+ LI = &getAnalysis<LoopInfo>();
+
+ emitFunction(F);
+ return false;
+ }
+
+ virtual bool doFinalization(Module &M) { return false; }
+
+ /*! Emit the complete function code and declaration */
+ void emitFunction(Function &F);
+ /*! Handle input and output function parameters */
+ void emitFunctionPrototype(Function &F);
+ /*! Emit the code for a basic block */
+ void emitBasicBlock(BasicBlock *BB);
+ /*! Each block end may require to emit MOVs for further PHIs */
+ void emitMovForPHI(BasicBlock *curr, BasicBlock *succ);
+ /*! Alocate one or several registers (if vector) for the value */
+ INLINE void newRegister(Value *value, Value *key = NULL);
+ /*! Return a valid register from an operand (can use LOADI to make one) */
+ INLINE ir::Register getRegister(Value *value, uint32_t index = 0);
+ /*! Create a new immediate from a constant */
+ ir::ImmediateIndex newImmediate(Constant *CPV, uint32_t index = 0);
+ /*! Insert a new label index when this is a scalar value */
+ INLINE void newLabelIndex(const BasicBlock *bb);
+ /*! Inspect the terminator instruction and try to see if we should invert
+ * the value to simplify the code
+ */
+ INLINE void simplifyTerminator(BasicBlock *bb);
+ /*! Helper function to emit loads and stores */
+ template <bool isLoad, typename T> void emitLoadOrStore(T &I);
+ /*! Will try to remove MOVs due to PHI resolution */
+ void removeMOVs(const ir::Liveness &liveness, ir::Function &fn);
+ /*! Will try to remove redundants LOADI in basic blocks */
+ void removeLOADIs(const ir::Liveness &liveness, ir::Function &fn);
+ /*! To avoid lost copy, we need two values for PHI. This function create a
+ * fake value for the copy (basically ptr+1)
+ */
+ INLINE Value *getPHICopy(Value *PHI);
+ // Currently supported instructions
+#define DECL_VISIT_FN(NAME, TYPE) \
+ void regAllocate##NAME(TYPE &I); \
+ void emit##NAME(TYPE &I); \
+ void visit##NAME(TYPE &I) { \
+ if (pass == PASS_EMIT_INSTRUCTIONS) \
+ emit##NAME(I); \
+ else \
+ regAllocate##NAME(I); \
+ }
+ DECL_VISIT_FN(BinaryOperator, Instruction);
+ DECL_VISIT_FN(CastInst, CastInst);
+ DECL_VISIT_FN(ReturnInst, ReturnInst);
+ DECL_VISIT_FN(LoadInst, LoadInst);
+ DECL_VISIT_FN(StoreInst, StoreInst);
+ DECL_VISIT_FN(CallInst, CallInst);
+ DECL_VISIT_FN(ICmpInst, ICmpInst);
+ DECL_VISIT_FN(FCmpInst, FCmpInst);
+ DECL_VISIT_FN(InsertElement, InsertElementInst);
+ DECL_VISIT_FN(ExtractElement, ExtractElementInst);
+ DECL_VISIT_FN(ShuffleVectorInst, ShuffleVectorInst);
+ DECL_VISIT_FN(SelectInst, SelectInst);
+ DECL_VISIT_FN(BranchInst, BranchInst);
+ DECL_VISIT_FN(PHINode, PHINode);
+ DECL_VISIT_FN(AllocaInst, AllocaInst);
+#undef DECL_VISIT_FN
+
+ // Emit unary instructions from gen native function
+ void emitUnaryCallInst(CallInst &I, CallSite &CS, ir::Opcode opcode);
+
+ // These instructions are not supported at all
+ void visitVAArgInst(VAArgInst &I) {NOT_SUPPORTED;}
+ void visitSwitchInst(SwitchInst &I) {NOT_SUPPORTED;}
+ void visitInvokeInst(InvokeInst &I) {NOT_SUPPORTED;}
+#if LLVM_VERSION_MINOR == 0
+ void visitUnwindInst(UnwindInst &I) {NOT_SUPPORTED;}
+#endif /* __LLVM_30__ */
+ void visitResumeInst(ResumeInst &I) {NOT_SUPPORTED;}
+ void visitInlineAsm(CallInst &I) {NOT_SUPPORTED;}
+ void visitIndirectBrInst(IndirectBrInst &I) {NOT_SUPPORTED;}
+ void visitUnreachableInst(UnreachableInst &I) {NOT_SUPPORTED;}
+ void visitGetElementPtrInst(GetElementPtrInst &I) {NOT_SUPPORTED;}
+ void visitInsertValueInst(InsertValueInst &I) {NOT_SUPPORTED;}
+ void visitExtractValueInst(ExtractValueInst &I) {NOT_SUPPORTED;}
+ template <bool isLoad, typename T> void visitLoadOrStore(T &I);
+
+ void visitInstruction(Instruction &I) {NOT_SUPPORTED;}
+ };
+
+ char GenWriter::ID = 0;
+
+ bool GenWriter::doInitialization(Module &M) {
+ FunctionPass::doInitialization(M);
+
+ // Initialize
+ TheModule = &M;
+ return false;
+ }
+
+ static Constant *extractConstantElem(Constant *CPV, uint32_t index) {
+ ConstantVector *CV = dyn_cast<ConstantVector>(CPV);
+ GBE_ASSERT(CV != NULL);
+#if GBE_DEBUG
+ const uint32_t elemNum = CV->getNumOperands();
+ GBE_ASSERTM(index < elemNum, "Out-of-bound constant vector access");
+#endif /* GBE_DEBUG */
+ CPV = cast<Constant>(CV->getOperand(index));
+ return CPV;
+ }
+
+ template <typename U, typename T>
+ static U processConstant(Constant *CPV, T doIt, uint32_t index = 0u)
+ {
+#if GBE_DEBUG
+ GBE_ASSERTM(dyn_cast<ConstantExpr>(CPV) == NULL, "Unsupported constant expression");
+ if (isa<UndefValue>(CPV) && CPV->getType()->isSingleValueType())
+ GBE_ASSERTM(false, "Unsupported constant expression");
+#endif /* GBE_DEBUG */
+
+#if LLVM_VERSION_MINOR > 0
+ ConstantDataSequential *seq = dyn_cast<ConstantDataSequential>(CPV);
+
+ if (seq) {
+ Type *Ty = seq->getElementType();
+ if (Ty == Type::getInt1Ty(CPV->getContext())) {
+ const uint64_t u64 = seq->getElementAsInteger(index);
+ return doIt(bool(u64));
+ } else if (Ty == Type::getInt8Ty(CPV->getContext())) {
+ const uint64_t u64 = seq->getElementAsInteger(index);
+ return doIt(uint8_t(u64));
+ } else if (Ty == Type::getInt16Ty(CPV->getContext())) {
+ const uint64_t u64 = seq->getElementAsInteger(index);
+ return doIt(uint16_t(u64));
+ } else if (Ty == Type::getInt32Ty(CPV->getContext())) {
+ const uint64_t u64 = seq->getElementAsInteger(index);
+ return doIt(uint32_t(u64));
+ } else if (Ty == Type::getInt64Ty(CPV->getContext())) {
+ const uint64_t u64 = seq->getElementAsInteger(index);
+ return doIt(u64);
+ } else if (Ty == Type::getFloatTy(CPV->getContext())) {
+ const float f32 = seq->getElementAsFloat(index);
+ return doIt(f32);
+ } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
+ const float f64 = seq->getElementAsDouble(index);
+ return doIt(f64);
+ }
+ } else
+#endif /* LLVM_VERSION_MINOR > 0 */
+
+ if (dyn_cast<ConstantAggregateZero>(CPV)) {
+ return doIt(uint32_t(0)); // XXX Handle type
+ } else {
+ if (dyn_cast<ConstantVector>(CPV))
+ CPV = extractConstantElem(CPV, index);
+ GBE_ASSERTM(dyn_cast<ConstantExpr>(CPV) == NULL, "Unsupported constant expression");
+
+ // Integers
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
+ Type* Ty = CI->getType();
+ if (Ty == Type::getInt1Ty(CPV->getContext())) {
+ const bool b = CI->getZExtValue();
+ return doIt(b);
+ } else if (Ty == Type::getInt8Ty(CPV->getContext())) {
+ const uint8_t u8 = CI->getZExtValue();
+ return doIt(u8);
+ } else if (Ty == Type::getInt16Ty(CPV->getContext())) {
+ const uint16_t u16 = CI->getZExtValue();
+ return doIt(u16);
+ } else if (Ty == Type::getInt32Ty(CPV->getContext())) {
+ const uint32_t u32 = CI->getZExtValue();
+ return doIt(u32);
+ } else if (Ty == Type::getInt64Ty(CPV->getContext())) {
+ const uint64_t u64 = CI->getZExtValue();
+ return doIt(u64);
+ } else {
+ GBE_ASSERTM(false, "Unsupported integer size");
+ return doIt(uint64_t(0));
+ }
+ }
+
+ // Floats and doubles
+ const Type::TypeID typeID = CPV->getType()->getTypeID();
+ switch (typeID) {
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ {
+ ConstantFP *FPC = cast<ConstantFP>(CPV);
+ GBE_ASSERT(isa<UndefValue>(CPV) == false);
+
+ if (FPC->getType() == Type::getFloatTy(CPV->getContext())) {
+ const float f32 = FPC->getValueAPF().convertToFloat();
+ return doIt(f32);
+ } else {
+ const double f64 = FPC->getValueAPF().convertToDouble();
+ return doIt(f64);
+ }
+ }
+ break;
+ default:
+ GBE_ASSERTM(false, "Unsupported constant type");
+ break;
+ }
+ }
+
+ GBE_ASSERTM(false, "Unsupported constant type");
+ return doIt(uint64_t(0));
+ }
+
+ /*! Pfff. I cannot use a lambda, since it is templated. Congratulation c++ */
+ struct NewImmediateFunctor
+ {
+ NewImmediateFunctor(ir::Context &ctx) : ctx(ctx) {}
+ template <typename T> ir::ImmediateIndex operator() (const T &t) {
+ return ctx.newImmediate(t);
+ }
+ ir::Context &ctx;
+ };
+
+ ir::ImmediateIndex GenWriter::newImmediate(Constant *CPV, uint32_t index) {
+ return processConstant<ir::ImmediateIndex>(CPV, NewImmediateFunctor(ctx), index);
+ }
+
+ void GenWriter::newRegister(Value *value, Value *key) {
+ auto type = value->getType();
+ auto typeID = type->getTypeID();
+ switch (typeID) {
+ case Type::IntegerTyID:
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ case Type::PointerTyID:
+ regTranslator.newScalar(value, key);
+ break;
+ case Type::VectorTyID:
+ {
+ auto vectorType = cast<VectorType>(type);
+ const uint32_t elemNum = vectorType->getNumElements();
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
+ regTranslator.newScalar(value, key, elemID);
+ break;
+ }
+ default: NOT_SUPPORTED;
+ };
+ }
+
+ ir::Register GenWriter::getRegister(Value *value, uint32_t elemID) {
+ Constant *CPV = dyn_cast<Constant>(value);
+ if (CPV) {
+ GBE_ASSERT(isa<GlobalValue>(CPV) == false);
+ const ir::ImmediateIndex immIndex = this->newImmediate(CPV, elemID);
+ const ir::Immediate imm = ctx.getImmediate(immIndex);
+ const ir::Register reg = ctx.reg(getFamily(imm.type));
+ ctx.LOADI(imm.type, reg, immIndex);
+ return reg;
+ }
+ else
+ return regTranslator.getScalar(value, elemID);
+ }
+
+ INLINE Value *GenWriter::getPHICopy(Value *PHI) {
+ const uintptr_t ptr = (uintptr_t) PHI;
+ return (Value*) (ptr+1);
+ }
+
+ void GenWriter::newLabelIndex(const BasicBlock *bb) {
+ if (labelMap.find(bb) == labelMap.end()) {
+ const ir::LabelIndex label = ctx.label();
+ labelMap[bb] = label;
+ }
+ }
+
+ void GenWriter::simplifyTerminator(BasicBlock *bb) {
+ Value *value = --bb->end();
+ BranchInst *I = NULL;
+ if ((I = dyn_cast<BranchInst>(value)) != NULL) {
+ if (I->isConditional() == false)
+ return;
+ // If the "taken" successor is the next block, we try to invert the
+ // branch.
+ BasicBlock *succ = I->getSuccessor(0);
+ if (llvm::next(Function::iterator(bb)) != Function::iterator(succ))
+ return;
+
+ // More than one use is too complicated: we skip it
+ Value *condition = I->getCondition();
+ if (condition->hasOneUse() == false)
+ return;
+
+ // Right now, we only invert comparison instruction
+ ICmpInst *CI = dyn_cast<ICmpInst>(condition);
+ if (CI != NULL) {
+ GBE_ASSERT(conditionSet.find(CI) == conditionSet.end());
+ conditionSet.insert(CI);
+ return;
+ }
+ }
+ }
+
+ void GenWriter::emitBasicBlock(BasicBlock *BB) {
+ GBE_ASSERT(labelMap.find(BB) != labelMap.end());
+ ctx.LABEL(labelMap[BB]);
+ for (auto II = BB->begin(), E = BB->end(); II != E; ++II) visit(*II);
+ }
+
+ void GenWriter::emitMovForPHI(BasicBlock *curr, BasicBlock *succ) {
+ for (BasicBlock::iterator I = succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ Value *IV = PN->getIncomingValueForBlock(curr);
+ if (!isa<UndefValue>(IV)) {
+ uint32_t elemNum;
+ Type *llvmType = PN->getType();
+ GBE_ASSERTM(llvmType != Type::getInt1Ty(llvmType->getContext()),
+ "TODO Boolean values cannot escape their definition basic block");
+ const ir::Type type = getVectorInfo(ctx, llvmType, PN, elemNum);
+
+ // Emit the MOV required by the PHI function. We do it simple and do not
+ // try to optimize them. A next data flow analysis pass on the Gen IR
+ // will remove them
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
+ Value *PHICopy = this->getPHICopy(PN);
+ const ir::Register dst = this->getRegister(PHICopy, elemID);
+ Constant *CP = dyn_cast<Constant>(IV);
+ if (CP) {
+ GBE_ASSERT(isa<GlobalValue>(CP) == false);
+ ConstantVector *CPV = dyn_cast<ConstantVector>(CP);
+ if (CPV && dyn_cast<ConstantVector>(CPV) &&
+ isa<UndefValue>(extractConstantElem(CPV, elemID)))
+ continue;
+ const ir::ImmediateIndex immIndex = this->newImmediate(CP, elemID);
+ const ir::Immediate imm = ctx.getImmediate(immIndex);
+ ctx.LOADI(imm.type, dst, immIndex);
+ } else if (regTranslator.valueExists(IV,elemID) || dyn_cast<Constant>(IV)) {
+ const ir::Register src = this->getRegister(IV, elemID);
+ ctx.MOV(type, dst, src);
+ }
+ }
+ }
+ }
+ }
+
+ void GenWriter::emitFunctionPrototype(Function &F)
+ {
+ GBE_ASSERTM(F.hasStructRetAttr() == false,
+ "Returned value for kernel functions is forbidden");
+ // Loop over the arguments and output registers for them
+ if (!F.arg_empty()) {
+ Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
+ const AttrListPtr &PAL = F.getAttributes();
+
+ // Insert a new register for each function argument
+ uint32_t argID = 1; // Start at one actually
+ for (; I != E; ++I, ++argID) {
+ Type *type = I->getType();
+ GBE_ASSERTM(isScalarType(type) == true,
+ "vector type in the function argument is not supported yet");
+ const ir::Register reg = regTranslator.newScalar(I);
+ if (type->isPointerTy() == false)
+ ctx.input(ir::FunctionArgument::VALUE, reg, getTypeByteSize(unit, type));
+ else {
+ PointerType *pointerType = dyn_cast<PointerType>(type);
+ // By value structure
+#if LLVM_VERSION_MINOR <= 1
+ if (PAL.paramHasAttr(argID, Attribute::ByVal)) {
+#else
+ if (PAL.getParamAttributes(argID).hasAttribute(Attributes::ByVal)) {
+#endif /* LLVM_VERSION_MINOR <= 1 */
+ Type *pointed = pointerType->getElementType();
+ const size_t structSize = getTypeByteSize(unit, pointed);
+ ctx.input(ir::FunctionArgument::STRUCTURE, reg, structSize);
+ }
+ // Regular user provided pointer (global, local or constant)
+ else {
+ const uint32_t addr = pointerType->getAddressSpace();
+ const ir::AddressSpace addrSpace = addressSpaceLLVMToGen(addr);
+ const uint32_t ptrSize = getTypeByteSize(unit, type);
+ switch (addrSpace) {
+ case ir::MEM_GLOBAL:
+ ctx.input(ir::FunctionArgument::GLOBAL_POINTER, reg, ptrSize);
+ break;
+ case ir::MEM_LOCAL:
+ ctx.input(ir::FunctionArgument::LOCAL_POINTER, reg, ptrSize);
+ ctx.getFunction().setUseSLM(true);
+ break;
+ case ir::MEM_CONSTANT:
+ ctx.input(ir::FunctionArgument::CONSTANT_POINTER, reg, ptrSize);
+ break;
+ default: GBE_ASSERT(addrSpace != ir::MEM_PRIVATE);
+ }
+ }
+ }
+ }
+ }
+
+ // When returning a structure, first input register is the pointer to the
+ // structure
+#if GBE_DEBUG
+ const Type *type = F.getReturnType();
+ GBE_ASSERTM(type->isVoidTy() == true,
+ "Returned value for kernel functions is forbidden");
+
+ // Variable number of arguments is not supported
+ FunctionType *FT = cast<FunctionType>(F.getFunctionType());
+ GBE_ASSERT(FT->isVarArg() == false);
+#endif /* GBE_DEBUG */
+ }
+
+ static inline bool isFPIntBitCast(const Instruction &I) {
+ if (!isa<BitCastInst>(I))
+ return false;
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DstTy = I.getType();
+ return (SrcTy->isFloatingPointTy() && DstTy->isIntegerTy()) ||
+ (DstTy->isFloatingPointTy() && SrcTy->isIntegerTy());
+ }
+
+ /*! To track last read and write of the registers */
+ struct RegInfoForMov {
+ ir::Instruction *lastWriteInsn;
+ ir::Instruction *lastReadInsn;
+ uint32_t lastWrite;
+ uint32_t lastRead;
+ };
+
+ /*! Replace register "from" by register "to" in the destination(s) */
+ static void replaceDst(ir::Instruction *insn, ir::Register from, ir::Register to) {
+ const uint32_t dstNum = insn->getDstNum();
+ for (uint32_t dstID = 0; dstID < dstNum; ++dstID)
+ if (insn->getDst(dstID) == from)
+ insn->setDst(dstID, to);
+ }
+
+ /*! Replace register "from" by register "to" in the source(s) */
+ static void replaceSrc(ir::Instruction *insn, ir::Register from, ir::Register to) {
+ const uint32_t srcNum = insn->getSrcNum();
+ for (uint32_t srcID = 0; srcID < srcNum; ++srcID)
+ if (insn->getSrc(srcID) == from)
+ insn->setSrc(srcID, to);
+ }
+
+ /*! lastUse maintains data about last uses (reads/writes) for each
+ * ir::Register
+ */
+ static void buildRegInfo(ir::BasicBlock &bb, vector<RegInfoForMov> &lastUse)
+ {
+ // Clear the register usages
+ for (auto &x : lastUse) {
+ x.lastWrite = x.lastRead = 0;
+ x.lastWriteInsn = x.lastReadInsn = NULL;
+ }
+
+ // Find use intervals for all registers (distinguish sources and
+ // destinations)
+ uint32_t insnID = 2;
+ bb.foreach([&](ir::Instruction &insn) {
+ const uint32_t dstNum = insn.getDstNum();
+ const uint32_t srcNum = insn.getSrcNum();
+ for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
+ const ir::Register reg = insn.getSrc(srcID);
+ lastUse[reg].lastRead = insnID;
+ lastUse[reg].lastReadInsn = &insn;
+ }
+ for (uint32_t dstID = 0; dstID < dstNum; ++dstID) {
+ const ir::Register reg = insn.getDst(dstID);
+ lastUse[reg].lastWrite = insnID+1;
+ lastUse[reg].lastWriteInsn = &insn;
+ }
+ insnID+=2;
+ });
+ }
+
+ void GenWriter::removeMOVs(const ir::Liveness &liveness, ir::Function &fn)
+ {
+ // We store the last write and last read for each register
+ const uint32_t regNum = fn.regNum();
+ vector<RegInfoForMov> lastUse;
+ lastUse.resize(regNum);
+
+ // Remove the MOVs per block (local analysis only) Note that we do not try
+ // to remove MOV for variables that outlives the block. So we use liveness
+ // information to figure out which variable is alive
+ fn.foreachBlock([&](ir::BasicBlock &bb)
+ {
+ // We need to know when each register will be read or written
+ buildRegInfo(bb, lastUse);
+
+ // Liveinfo helps us to know if the source outlives the block
+ const ir::Liveness::BlockInfo &info = liveness.getBlockInfo(&bb);
+
+ auto it = --bb.end();
+ if (it->isMemberOf<ir::BranchInstruction>() == true) --it;
+ for (auto it = --bb.end(); it != bb.end();) {
+ ir::Instruction *insn = &*it; it--;
+ const ir::Opcode op = insn->getOpcode();
+ if (op == ir::OP_MOV) {
+ const ir::Register dst = insn->getDst(0);
+ const ir::Register src = insn->getSrc(0);
+ // Outlives the block. We do not do anything
+ if (info.inLiveOut(src))
+ continue;
+ const RegInfoForMov &dstInfo = lastUse[dst];
+ const RegInfoForMov &srcInfo = lastUse[src];
+ // The source is not computed in this block
+ if (srcInfo.lastWrite == 0)
+ continue;
+ // dst is read after src is written. We cannot overwrite dst
+ if (dstInfo.lastRead > srcInfo.lastWrite)
+ continue;
+ // We are good. We first patch the destination then all the sources
+ replaceDst(srcInfo.lastWriteInsn, src, dst);
+ // Then we patch all subsequent uses of the source
+ ir::Instruction *next = static_cast<ir::Instruction*>(srcInfo.lastWriteInsn->next);
+ while (next != insn) {
+ replaceSrc(next, src, dst);
+ next = static_cast<ir::Instruction*>(next->next);
+ }
+ insn->remove();
+ } else if (op == ir::OP_LOADI)
+ continue;
+ else
+ break;
+ }
+ });
+ }
+
+ void GenWriter::removeLOADIs(const ir::Liveness &liveness, ir::Function &fn)
+ {
+ // We store the last write and last read for each register
+ const uint32_t regNum = fn.regNum();
+ vector<RegInfoForMov> lastUse;
+ lastUse.resize(regNum);
+
+ // Traverse all blocks and remove redundant immediates. Do *not* remove
+ // immediates that outlive the block
+ fn.foreachBlock([&](ir::BasicBlock &bb)
+ {
+ // Each immediate that is already loaded in the block
+ map<ir::Immediate, ir::Register> loadedImm;
+
+ // Immediate to immediate translation
+ map<ir::Register, ir::Register> immTranslate;
+
+ // Liveinfo helps us to know if the loaded immediate outlives the block
+ const ir::Liveness::BlockInfo &info = liveness.getBlockInfo(&bb);
+
+ // We need to know when each register will be read or written
+ buildRegInfo(bb, lastUse);
+
+ // Top bottom traversal -> remove useless LOADIs
+ uint32_t insnID = 2;
+ bb.foreach([&](ir::Instruction &insn)
+ {
+ // We either try to remove the LOADI or we will try to use it as a
+ // replacement for the next same LOADIs
+ if (insn.isMemberOf<ir::LoadImmInstruction>()) {
+ ir::LoadImmInstruction &loadImm = cast<ir::LoadImmInstruction>(insn);
+ const ir::Immediate imm = loadImm.getImmediate();
+ const ir::Register dst = loadImm.getDst(0);
+
+ // Not here: cool, we put it in the map if the register is not
+ // overwritten. If it is, we just ignore it for simplicity. Note that
+ // it should not happen with the way we "unSSA" the code
+ auto it = loadedImm.find(imm);
+ auto end = loadedImm.end();
+ if (it == end && lastUse[dst].lastWrite == insnID+1)
+ loadedImm.insert(std::make_pair(imm, dst));
+ // We already pushed the same immediate and we do not outlive the
+ // block. We are good to replace this immediate by the previous one
+ else if (it != end && info.inLiveOut(dst) == false) {
+ immTranslate.insert(std::make_pair(dst, it->second));
+ insn.remove();
+ }
+ }
+ // Traverse all the destinations and sources and perform the
+ // substitutions (if any)
+ else {
+ const uint32_t srcNum = insn.getSrcNum();
+ const uint32_t dstNum = insn.getSrcNum();
+ for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
+ const ir::Register src = insn.getSrc(srcID);
+ auto it = immTranslate.find(src);
+ if (it != immTranslate.end())
+ insn.setSrc(srcID, it->second);
+ }
+ for (uint32_t dstID = 0; dstID < dstNum; ++dstID) {
+ const ir::Register dst = insn.getSrc(dstID);
+ auto it = immTranslate.find(dst);
+ if (it != immTranslate.end())
+ insn.setDst(dstID, it->second);
+ }
+ }
+ insnID += 2;
+ });
+ });
+ }
+
+ BVAR(OCL_OPTIMIZE_PHI_MOVES, true);
+ BVAR(OCL_OPTIMIZE_LOADI, true);
+
+ void GenWriter::emitFunction(Function &F)
+ {
+ switch (F.getCallingConv()) {
+ case CallingConv::PTX_Device: // we do not emit device function
+ return;
+ case CallingConv::PTX_Kernel:
+ break;
+ default: GBE_ASSERTM(false, "Unsupported calling convention");
+ }
+
+ ctx.startFunction(F.getName());
+ this->regTranslator.clear();
+ this->labelMap.clear();
+ this->emitFunctionPrototype(F);
+
+ // Visit all the instructions and emit the IR registers or the value to
+ // value mapping when a new register is not needed
+ pass = PASS_EMIT_REGISTERS;
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I)
+ visit(*I);
+
+ // First create all the labels (one per block) ...
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ this->newLabelIndex(BB);
+
+ // Then, for all branch instructions that have conditions, see if we can
+ // simplify the code by inverting condition code
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ this->simplifyTerminator(BB);
+
+ // ... then, emit the instructions for all basic blocks
+ pass = PASS_EMIT_INSTRUCTIONS;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ emitBasicBlock(BB);
+ ir::Function &fn = ctx.getFunction();
+ ctx.endFunction();
+
+ // Liveness can be shared when we optimized the immediates and the MOVs
+ const ir::Liveness liveness(fn);
+
+ if (OCL_OPTIMIZE_LOADI) this->removeLOADIs(liveness, fn);
+ if (OCL_OPTIMIZE_PHI_MOVES) this->removeMOVs(liveness, fn);
+ }
+
+ void GenWriter::regAllocateReturnInst(ReturnInst &I) {}
+
+ void GenWriter::emitReturnInst(ReturnInst &I) {
+ const ir::Function &fn = ctx.getFunction();
+ GBE_ASSERTM(fn.outputNum() <= 1, "no more than one value can be returned");
+ if (fn.outputNum() == 1 && I.getNumOperands() > 0) {
+ const ir::Register dst = fn.getOutput(0);
+ const ir::Register src = this->getRegister(I.getOperand(0));
+ const ir::RegisterFamily family = fn.getRegisterFamily(dst);
+ ctx.MOV(ir::getType(family), dst, src);
+ }
+ ctx.RET();
+ }
+
+ void GenWriter::regAllocateBinaryOperator(Instruction &I) {
+ this->newRegister(&I);
+ }
+
+ void GenWriter::emitBinaryOperator(Instruction &I) {
+#if GBE_DEBUG
+ GBE_ASSERT(I.getType()->isPointerTy() == false);
+ // We accept logical operations on booleans
+ switch (I.getOpcode()) {
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ break;
+ default:
+ GBE_ASSERT(I.getType() != Type::getInt1Ty(I.getContext()));
+ }
+#endif /* GBE_DEBUG */
+
+ // Get the element type for a vector
+ uint32_t elemNum;
+ const ir::Type type = getVectorInfo(ctx, I.getType(), &I, elemNum);
+
+ // Emit the instructions in a row
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
+ const ir::Register dst = this->getRegister(&I, elemID);
+ const ir::Register src0 = this->getRegister(I.getOperand(0), elemID);
+ const ir::Register src1 = this->getRegister(I.getOperand(1), elemID);
+
+ switch (I.getOpcode()) {
+ case Instruction::Add:
+ case Instruction::FAdd: ctx.ADD(type, dst, src0, src1); break;
+ case Instruction::Sub:
+ case Instruction::FSub: ctx.SUB(type, dst, src0, src1); break;
+ case Instruction::Mul:
+ case Instruction::FMul: ctx.MUL(type, dst, src0, src1); break;
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem: ctx.REM(type, dst, src0, src1); break;
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv: ctx.DIV(type, dst, src0, src1); break;
+ case Instruction::And: ctx.AND(type, dst, src0, src1); break;
+ case Instruction::Or: ctx.OR(type, dst, src0, src1); break;
+ case Instruction::Xor: ctx.XOR(type, dst, src0, src1); break;
+ case Instruction::Shl: ctx.SHL(type, dst, src0, src1); break;
+ case Instruction::LShr: ctx.SHR(type, dst, src0, src1); break;
+ case Instruction::AShr: ctx.ASR(type, dst, src0, src1); break;
+ default: NOT_SUPPORTED;
+ }
+ }
+ }
+
+ void GenWriter::regAllocateICmpInst(ICmpInst &I) {
+ this->newRegister(&I);
+ }
+
+ static ir::Type makeTypeSigned(const ir::Type &type) {
+ if (type == ir::TYPE_U8) return ir::TYPE_S8;
+ else if (type == ir::TYPE_U16) return ir::TYPE_S16;
+ else if (type == ir::TYPE_U32) return ir::TYPE_S32;
+ else if (type == ir::TYPE_U64) return ir::TYPE_S64;
+ return type;
+ }
+
+ static ir::Type makeTypeUnsigned(const ir::Type &type) {
+ if (type == ir::TYPE_S8) return ir::TYPE_U8;
+ else if (type == ir::TYPE_S16) return ir::TYPE_U16;
+ else if (type == ir::TYPE_S32) return ir::TYPE_U32;
+ else if (type == ir::TYPE_S64) return ir::TYPE_U64;
+ return type;
+ }
+
+ void GenWriter::emitICmpInst(ICmpInst &I) {
+ GBE_ASSERT(I.getOperand(0)->getType() != Type::getInt1Ty(I.getContext()));
+
+ // Get the element type and the number of elements
+ uint32_t elemNum;
+ Type *operandType = I.getOperand(0)->getType();
+ const ir::Type type = getVectorInfo(ctx, operandType, &I, elemNum);
+ const ir::Type signedType = makeTypeSigned(type);
+ const ir::Type unsignedType = makeTypeUnsigned(type);
+
+ // Emit the instructions in a row
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
+ const ir::Register dst = this->getRegister(&I, elemID);
+ const ir::Register src0 = this->getRegister(I.getOperand(0), elemID);
+ const ir::Register src1 = this->getRegister(I.getOperand(1), elemID);
+
+ // We must invert the condition to simplify the branch code
+ if (conditionSet.find(&I) != conditionSet.end()) {
+ switch (I.getPredicate()) {
+ case ICmpInst::ICMP_EQ: ctx.NE(type, dst, src0, src1); break;
+ case ICmpInst::ICMP_NE: ctx.EQ(type, dst, src0, src1); break;
+ case ICmpInst::ICMP_ULE: ctx.GT((unsignedType), dst, src0, src1); break;
+ case ICmpInst::ICMP_SLE: ctx.GT(signedType, dst, src0, src1); break;
+ case ICmpInst::ICMP_UGE: ctx.LT(unsignedType, dst, src0, src1); break;
+ case ICmpInst::ICMP_SGE: ctx.LT(signedType, dst, src0, src1); break;
+ case ICmpInst::ICMP_ULT: ctx.GE(unsignedType, dst, src0, src1); break;
+ case ICmpInst::ICMP_SLT: ctx.GE(signedType, dst, src0, src1); break;
+ case ICmpInst::ICMP_UGT: ctx.LE(unsignedType, dst, src0, src1); break;
+ case ICmpInst::ICMP_SGT: ctx.LE(signedType, dst, src0, src1); break;
+ default: NOT_SUPPORTED;
+ }
+ }
+ // Nothing special to do
+ else {
+ switch (I.getPredicate()) {
+ case ICmpInst::ICMP_EQ: ctx.EQ(type, dst, src0, src1); break;
+ case ICmpInst::ICMP_NE: ctx.NE(type, dst, src0, src1); break;
+ case ICmpInst::ICMP_ULE: ctx.LE((unsignedType), dst, src0, src1); break;
+ case ICmpInst::ICMP_SLE: ctx.LE(signedType, dst, src0, src1); break;
+ case ICmpInst::ICMP_UGE: ctx.GE(unsignedType, dst, src0, src1); break;
+ case ICmpInst::ICMP_SGE: ctx.GE(signedType, dst, src0, src1); break;
+ case ICmpInst::ICMP_ULT: ctx.LT(unsignedType, dst, src0, src1); break;
+ case ICmpInst::ICMP_SLT: ctx.LT(signedType, dst, src0, src1); break;
+ case ICmpInst::ICMP_UGT: ctx.GT(unsignedType, dst, src0, src1); break;
+ case ICmpInst::ICMP_SGT: ctx.GT(signedType, dst, src0, src1); break;
+ default: NOT_SUPPORTED;
+ }
+ }
+ }
+ }
+
+ void GenWriter::regAllocateFCmpInst(FCmpInst &I) {
+ this->newRegister(&I);
+ }
+
+ void GenWriter::emitFCmpInst(FCmpInst &I) {
+
+ // Get the element type and the number of elements
+ uint32_t elemNum;
+ Type *operandType = I.getOperand(0)->getType();
+ const ir::Type type = getVectorInfo(ctx, operandType, &I, elemNum);
+
+ // Emit the instructions in a row
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
+ const ir::Register dst = this->getRegister(&I, elemID);
+ const ir::Register src0 = this->getRegister(I.getOperand(0), elemID);
+ const ir::Register src1 = this->getRegister(I.getOperand(1), elemID);
+
+ switch (I.getPredicate()) {
+ case ICmpInst::FCMP_OEQ:
+ case ICmpInst::FCMP_UEQ: ctx.EQ(type, dst, src0, src1); break;
+ case ICmpInst::FCMP_ONE:
+ case ICmpInst::FCMP_UNE: ctx.NE(type, dst, src0, src1); break;
+ case ICmpInst::FCMP_OLE:
+ case ICmpInst::FCMP_ULE: ctx.LE(type, dst, src0, src1); break;
+ case ICmpInst::FCMP_OGE:
+ case ICmpInst::FCMP_UGE: ctx.GE(type, dst, src0, src1); break;
+ case ICmpInst::FCMP_OLT:
+ case ICmpInst::FCMP_ULT: ctx.LT(type, dst, src0, src1); break;
+ case ICmpInst::FCMP_OGT:
+ case ICmpInst::FCMP_UGT: ctx.GT(type, dst, src0, src1); break;
+ default: NOT_SUPPORTED;
+ }
+ }
+ }
+
+ void GenWriter::regAllocateCastInst(CastInst &I) {
+ Value *dstValue = &I;
+ Value *srcValue = I.getOperand(0);
+ const auto op = I.getOpcode();
+
+ switch (op)
+ {
+ // When casting pointer to integers, be aware with integers
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ {
+ Constant *CPV = dyn_cast<Constant>(srcValue);
+ if (CPV == NULL) {
+#if GBE_DEBUG
+ Type *dstType = dstValue->getType();
+ Type *srcType = srcValue->getType();
+ GBE_ASSERT(getTypeByteSize(unit, dstType) == getTypeByteSize(unit, srcType));
+#endif /* GBE_DEBUG */
+ regTranslator.newValueProxy(srcValue, dstValue);
+ } else
+ this->newRegister(dstValue);
+ }
+ break;
+ // Bitcast just forward registers
+ case Instruction::BitCast:
+ {
+ uint32_t elemNum;
+ getVectorInfo(ctx, I.getType(), &I, elemNum);
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
+ regTranslator.newValueProxy(srcValue, dstValue, elemID, elemID);
+ }
+ break;
+ // Various conversion operations -> just allocate registers for them
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::SExt:
+ case Instruction::ZExt:
+ case Instruction::FPExt:
+ case Instruction::FPTrunc:
+ case Instruction::Trunc:
+ this->newRegister(&I);
+ break;
+ default: NOT_SUPPORTED;
+ }
+ }
+
+ void GenWriter::emitCastInst(CastInst &I) {
+ switch (I.getOpcode())
+ {
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ {
+ Value *dstValue = &I;
+ Value *srcValue = I.getOperand(0);
+ Constant *CPV = dyn_cast<Constant>(srcValue);
+ if (CPV != NULL) {
+ const ir::ImmediateIndex index = ctx.newImmediate(CPV);
+ const ir::Immediate imm = ctx.getImmediate(index);
+ const ir::Register reg = this->getRegister(dstValue);
+ ctx.LOADI(imm.type, reg, index);
+ }
+ }
+ break;
+ case Instruction::BitCast: break; // nothing to emit here
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::SExt:
+ case Instruction::ZExt:
+ case Instruction::FPExt:
+ case Instruction::FPTrunc:
+ case Instruction::Trunc:
+ {
+ // Get the element type for a vector
+ uint32_t elemNum;
+ Type *llvmDstType = I.getType();
+ Type *llvmSrcType = I.getOperand(0)->getType();
+ const ir::Type dstType = getVectorInfo(ctx, llvmDstType, &I, elemNum);
+ const ir::Type srcType = getVectorInfo(ctx, llvmSrcType, &I, elemNum);
+
+ // We use a select (0,1) not a convert when the destination is a boolean
+ if (srcType == ir::TYPE_BOOL) {
+ const ir::RegisterFamily family = getFamily(dstType);
+ const ir::ImmediateIndex zero = ctx.newIntegerImmediate(0, dstType);
+ const ir::ImmediateIndex one = ctx.newIntegerImmediate(1, dstType);
+ const ir::Register zeroReg = ctx.reg(family);
+ const ir::Register oneReg = ctx.reg(family);
+ ctx.LOADI(dstType, zeroReg, zero);
+ ctx.LOADI(dstType, oneReg, one);
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
+ const ir::Register dst = this->getRegister(&I, elemID);
+ const ir::Register src = this->getRegister(I.getOperand(0), elemID);
+ ctx.SEL(dstType, dst, src, oneReg, zeroReg);
+ }
+ }
+ // Use a convert for the other cases
+ else {
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
+ const ir::Register dst = this->getRegister(&I, elemID);
+ const ir::Register src = this->getRegister(I.getOperand(0), elemID);
+ ctx.CVT(dstType, srcType, dst, src);
+ }
+ }
+ }
+ break;
+ default: NOT_SUPPORTED;
+ }
+ }
+
+ /*! Once again, it is a templated functor. No lambda */
+ struct InsertExtractFunctor {
+ InsertExtractFunctor(ir::Context &ctx) : ctx(ctx) {}
+ template <typename T> ir::Immediate operator() (const T &t) {
+ return ir::Immediate(t);
+ }
+ ir::Context &ctx;
+ };
+
+ void GenWriter::regAllocateInsertElement(InsertElementInst &I) {
+ Value *modified = I.getOperand(0);
+ Value *toInsert = I.getOperand(1);
+ Value *index = I.getOperand(2);
+
+ // Get the index for the insertion
+ Constant *CPV = dyn_cast<Constant>(index);
+ GBE_ASSERTM(CPV != NULL, "only constant indices when inserting values");
+ auto x = processConstant<ir::Immediate>(CPV, InsertExtractFunctor(ctx));
+ GBE_ASSERTM(x.type == ir::TYPE_U32 || x.type == ir::TYPE_S32,
+ "Invalid index type for InsertElement");
+
+ // Crash on overrun
+ VectorType *vectorType = cast<VectorType>(modified->getType());
+ const uint32_t elemNum = vectorType->getNumElements();
+ const uint32_t modifiedID = x.data.u32;
+ GBE_ASSERTM(modifiedID < elemNum, "Out-of-bound index for InsertElement");
+
+ // The source vector is not constant
+ if (!isa<Constant>(modified) || isa<UndefValue>(modified)) {
+ // Non modified values are just proxies
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
+ if (elemID != modifiedID)
+ regTranslator.newValueProxy(modified, &I, elemID, elemID);
+ }
+ // The source vector is constant
+ else {
+ // Non modified values will use LOADI
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
+ if (elemID != modifiedID) {
+ const ir::Type type = getType(ctx, toInsert->getType());
+ const ir::Register reg = ctx.reg(getFamily(type));
+ regTranslator.insertRegister(reg, &I, elemID);
+ }
+ }
+
+ // If the element to insert is an immediate we will generate a LOADI.
+ // Otherwise, the value is just a proxy of the inserted value
+ if (dyn_cast<Constant>(toInsert) != NULL) {
+ const ir::Type type = getType(ctx, toInsert->getType());
+ const ir::Register reg = ctx.reg(getFamily(type));
+ regTranslator.insertRegister(reg, &I, modifiedID);
+ } else
+ regTranslator.newValueProxy(toInsert, &I, 0, modifiedID);
+ }
+
+ void GenWriter::emitInsertElement(InsertElementInst &I) {
+ // Note that we check everything in regAllocateInsertElement
+ Value *modified = I.getOperand(0);
+ Value *toInsert = I.getOperand(1);
+ Value *index = I.getOperand(2);
+
+ // Get the index of the value to insert
+ Constant *indexCPV = dyn_cast<Constant>(index);
+ auto x = processConstant<ir::Immediate>(indexCPV, InsertExtractFunctor(ctx));
+ const uint32_t modifiedID = x.data.u32;
+
+ // The source vector is constant. We need to insert LOADI for the unmodified
+ // values
+ if (isa<Constant>(modified) && !isa<UndefValue>(modified)) {
+ VectorType *vectorType = cast<VectorType>(modified->getType());
+ const uint32_t elemNum = vectorType->getNumElements();
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
+ if (elemID != modifiedID) {
+ Constant *sourceCPV = dyn_cast<Constant>(modified);
+ if (isa<UndefValue>(extractConstantElem(sourceCPV, elemID)) == false) {
+ const ir::ImmediateIndex immIndex = this->newImmediate(sourceCPV, elemID);
+ const ir::Immediate imm = ctx.getImmediate(immIndex);
+ const ir::Register reg = regTranslator.getScalar(&I, elemID);
+ ctx.LOADI(imm.type, reg, immIndex);
+ }
+ }
+ }
+
+ // If the inserted value is not a constant, we just use a proxy
+ if (dyn_cast<Constant>(toInsert) == NULL)
+ return;
+
+ // We need a LOADI if we insert an immediate
+ Constant *toInsertCPV = dyn_cast<Constant>(toInsert);
+ const ir::ImmediateIndex immIndex = this->newImmediate(toInsertCPV);
+ const ir::Immediate imm = ctx.getImmediate(immIndex);
+ const ir::Register reg = regTranslator.getScalar(&I, modifiedID);
+ ctx.LOADI(imm.type, reg, immIndex);
+ }
+
+ void GenWriter::regAllocateExtractElement(ExtractElementInst &I) {
+ Value *extracted = I.getOperand(0);
+ Value *index = I.getOperand(1);
+ GBE_ASSERTM(isa<Constant>(extracted) == false,
+ "TODO support constant vector for extract");
+ Constant *CPV = dyn_cast<Constant>(index);
+ GBE_ASSERTM(CPV != NULL, "only constant indices when inserting values");
+ auto x = processConstant<ir::Immediate>(CPV, InsertExtractFunctor(ctx));
+ GBE_ASSERTM(x.type == ir::TYPE_U32 || x.type == ir::TYPE_S32,
+ "Invalid index type for InsertElement");
+
+ // Crash on overrun
+ const uint32_t extractedID = x.data.u32;
+#if GBE_DEBUG
+ VectorType *vectorType = cast<VectorType>(extracted->getType());
+ const uint32_t elemNum = vectorType->getNumElements();
+ GBE_ASSERTM(extractedID < elemNum, "Out-of-bound index for InsertElement");
+#endif /* GBE_DEBUG */
+
+ // Easy when the vector is not immediate
+ regTranslator.newValueProxy(extracted, &I, extractedID, 0);
+ }
+
+ void GenWriter::emitExtractElement(ExtractElementInst &I) {
+ // TODO -> insert LOADI when the extracted vector is constant
+ }
+
+ void GenWriter::regAllocateShuffleVectorInst(ShuffleVectorInst &I) {
+ Value *first = I.getOperand(0);
+ Value *second = I.getOperand(1);
+ GBE_ASSERTM(!isa<Constant>(first) || isa<UndefValue>(first),
+ "TODO support constant vector for shuffle");
+ GBE_ASSERTM(!isa<Constant>(second) || isa<UndefValue>(second),
+ "TODO support constant vector for shuffle");
+ VectorType *dstType = cast<VectorType>(I.getType());
+ VectorType *srcType = cast<VectorType>(first->getType());
+ const uint32_t dstElemNum = dstType->getNumElements();
+ const uint32_t srcElemNum = srcType->getNumElements();
+ for (uint32_t elemID = 0; elemID < dstElemNum; ++elemID) {
+ uint32_t srcID = I.getMaskValue(elemID);
+ Value *src = first;
+ if (srcID >= srcElemNum) {
+ srcID -= srcElemNum;
+ src = second;
+ }
+ regTranslator.newValueProxy(src, &I, srcID, elemID);
+ }
+ }
+
+ void GenWriter::emitShuffleVectorInst(ShuffleVectorInst &I) {}
+
+ void GenWriter::regAllocateSelectInst(SelectInst &I) {
+ this->newRegister(&I);
+ }
+
+ void GenWriter::emitSelectInst(SelectInst &I) {
+ // Get the element type for a vector
+ uint32_t elemNum;
+ const ir::Type type = getVectorInfo(ctx, I.getType(), &I, elemNum);
+
+ // Condition can be either a vector or a scalar
+ Type *condType = I.getOperand(0)->getType();
+ const bool isVectorCond = isa<VectorType>(condType);
+
+ // Emit the instructions in a row
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
+ const ir::Register dst = this->getRegister(&I, elemID);
+ const uint32_t condID = isVectorCond ? elemID : 0;
+ const ir::Register cond = this->getRegister(I.getOperand(0), condID);
+ const ir::Register src0 = this->getRegister(I.getOperand(1), elemID);
+ const ir::Register src1 = this->getRegister(I.getOperand(2), elemID);
+ ctx.SEL(type, dst, cond, src0, src1);
+ }
+ }
+
+ void GenWriter::regAllocatePHINode(PHINode &I) {
+ // Copy 1 for the PHI
+ this->newRegister(&I);
+ // Copy 2 to avoid lost copy issue
+ Value *copy = this->getPHICopy(&I);
+ this->newRegister(&I, copy);
+ }
+
+ void GenWriter::emitPHINode(PHINode &I) {
+ Value *copy = this->getPHICopy(&I);
+ uint32_t elemNum;
+ const ir::Type type = getVectorInfo(ctx, I.getType(), &I, elemNum);
+
+ // Emit the MOVs to avoid the lost copy issue
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
+ const ir::Register dst = this->getRegister(&I, elemID);
+ const ir::Register src = this->getRegister(copy, elemID);
+ ctx.MOV(type, dst, src);
+ }
+ }
+
+ void GenWriter::regAllocateBranchInst(BranchInst &I) {}
+
+ void GenWriter::emitBranchInst(BranchInst &I) {
+ // Emit MOVs if required
+ BasicBlock *bb = I.getParent();
+ this->emitMovForPHI(bb, I.getSuccessor(0));
+ if (I.isConditional())
+ this->emitMovForPHI(bb, I.getSuccessor(1));
+
+ // Inconditional branch. Just check that we jump to a block which is not our
+ // successor
+ if (I.isConditional() == false) {
+ BasicBlock *target = I.getSuccessor(0);
+ if (llvm::next(Function::iterator(bb)) != Function::iterator(target)) {
+ GBE_ASSERT(labelMap.find(target) != labelMap.end());
+ const ir::LabelIndex labelIndex = labelMap[target];
+ ctx.BRA(labelIndex);
+ }
+ }
+ // The LLVM branch has two targets
+ else {
+ BasicBlock *taken = NULL, *nonTaken = NULL;
+ Value *condition = I.getCondition();
+
+ // We may inverted the branch condition to simplify the branching code
+ const bool inverted = conditionSet.find(condition) != conditionSet.end();
+ taken = inverted ? I.getSuccessor(1) : I.getSuccessor(0);
+ nonTaken = inverted ? I.getSuccessor(0) : I.getSuccessor(1);
+
+ // Get both taken label and predicate register
+ GBE_ASSERT(labelMap.find(taken) != labelMap.end());
+ const ir::LabelIndex index = labelMap[taken];
+ const ir::Register reg = this->getRegister(condition);
+ ctx.BRA(index, reg);
+
+ // If non-taken target is the next block, there is nothing to do
+ BasicBlock *bb = I.getParent();
+ if (llvm::next(Function::iterator(bb)) == Function::iterator(nonTaken))
+ return;
+
+ // This is slightly more complicated here. We need to issue one more
+ // branch for the non-taken condition.
+ GBE_ASSERT(labelMap.find(nonTaken) != labelMap.end());
+ const ir::LabelIndex untakenIndex = ctx.label();
+ ctx.LABEL(untakenIndex);
+ ctx.BRA(labelMap[nonTaken]);
+ }
+ }
+
+ void GenWriter::regAllocateCallInst(CallInst &I) {
+ Value *dst = &I;
+ Value *Callee = I.getCalledValue();
+ GBE_ASSERT(ctx.getFunction().getProfile() == ir::PROFILE_OCL);
+ GBE_ASSERT(isa<InlineAsm>(I.getCalledValue()) == false);
+ GBE_ASSERT(I.hasStructRetAttr() == false);
+
+ // We only support a small number of intrinsics right now
+ if (Function *F = I.getCalledFunction()) {
+ const Intrinsic::ID intrinsicID = (Intrinsic::ID) F->getIntrinsicID();
+ if (intrinsicID != 0) {
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::stacksave:
+ this->newRegister(&I);
+ break;
+ case Intrinsic::stackrestore:
+ break;
+#if LLVM_VERSION_MINOR == 2
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ break;
+ case Intrinsic::fmuladd:
+ this->newRegister(&I);
+ break;
+#endif /* LLVM_VERSION_MINOR == 2 */
+ default:
+ GBE_ASSERTM(false, "Unsupported intrinsics");
+ }
+ return;
+ }
+ }
+
+ // Get the name of the called function and handle it
+ const std::string fnName = Callee->getName();
+ auto it = instrinsicMap.map.find(fnName);
+ GBE_ASSERT(it != instrinsicMap.map.end());
+ switch (it->second) {
+ case GEN_OCL_GET_GROUP_ID0:
+ regTranslator.newScalarProxy(ir::ocl::groupid0, dst); break;
+ case GEN_OCL_GET_GROUP_ID1:
+ regTranslator.newScalarProxy(ir::ocl::groupid1, dst); break;
+ case GEN_OCL_GET_GROUP_ID2:
+ regTranslator.newScalarProxy(ir::ocl::groupid2, dst); break;
+ case GEN_OCL_GET_LOCAL_ID0:
+ regTranslator.newScalarProxy(ir::ocl::lid0, dst); break;
+ case GEN_OCL_GET_LOCAL_ID1:
+ regTranslator.newScalarProxy(ir::ocl::lid1, dst); break;
+ case GEN_OCL_GET_LOCAL_ID2:
+ regTranslator.newScalarProxy(ir::ocl::lid2, dst); break;
+ case GEN_OCL_GET_NUM_GROUPS0:
+ regTranslator.newScalarProxy(ir::ocl::numgroup0, dst); break;
+ case GEN_OCL_GET_NUM_GROUPS1:
+ regTranslator.newScalarProxy(ir::ocl::numgroup1, dst); break;
+ case GEN_OCL_GET_NUM_GROUPS2:
+ regTranslator.newScalarProxy(ir::ocl::numgroup2, dst); break;
+ case GEN_OCL_GET_LOCAL_SIZE0:
+ regTranslator.newScalarProxy(ir::ocl::lsize0, dst); break;
+ case GEN_OCL_GET_LOCAL_SIZE1:
+ regTranslator.newScalarProxy(ir::ocl::lsize1, dst); break;
+ case GEN_OCL_GET_LOCAL_SIZE2:
+ regTranslator.newScalarProxy(ir::ocl::lsize2, dst); break;
+ case GEN_OCL_GET_GLOBAL_SIZE0:
+ regTranslator.newScalarProxy(ir::ocl::gsize0, dst); break;
+ case GEN_OCL_GET_GLOBAL_SIZE1:
+ regTranslator.newScalarProxy(ir::ocl::gsize1, dst); break;
+ case GEN_OCL_GET_GLOBAL_SIZE2:
+ regTranslator.newScalarProxy(ir::ocl::gsize2, dst); break;
+ case GEN_OCL_GET_GLOBAL_OFFSET0:
+ regTranslator.newScalarProxy(ir::ocl::goffset0, dst); break;
+ case GEN_OCL_GET_GLOBAL_OFFSET1:
+ regTranslator.newScalarProxy(ir::ocl::goffset1, dst); break;
+ case GEN_OCL_GET_GLOBAL_OFFSET2:
+ regTranslator.newScalarProxy(ir::ocl::goffset2, dst); break;
+ case GEN_OCL_COS:
+ case GEN_OCL_SIN:
+ case GEN_OCL_SQR:
+ case GEN_OCL_RSQ:
+ case GEN_OCL_LOG:
+ case GEN_OCL_POW:
+ case GEN_OCL_RCP:
+ case GEN_OCL_ABS:
+ case GEN_OCL_RNDZ:
+ case GEN_OCL_RNDE:
+ case GEN_OCL_RNDU:
+ case GEN_OCL_RNDD:
+ // No structure can be returned
+ this->newRegister(&I);
+ break;
+ case GEN_OCL_FORCE_SIMD8:
+ case GEN_OCL_FORCE_SIMD16:
+ case GEN_OCL_LBARRIER:
+ case GEN_OCL_GBARRIER:
+ case GEN_OCL_LGBARRIER:
+ break;
+ default:
+ GBE_ASSERTM(false, "Function call are not supported yet");
+ };
+ }
+
+ struct U64CPVExtractFunctor {
+ U64CPVExtractFunctor(ir::Context &ctx) : ctx(ctx) {}
+ template <typename T> INLINE uint64_t operator() (const T &t) {
+ return uint64_t(t);
+ }
+ ir::Context &ctx;
+ };
+
+ void GenWriter::emitUnaryCallInst(CallInst &I, CallSite &CS, ir::Opcode opcode) {
+ CallSite::arg_iterator AI = CS.arg_begin();
+#if GBE_DEBUG
+ CallSite::arg_iterator AE = CS.arg_end();
+#endif /* GBE_DEBUG */
+ GBE_ASSERT(AI != AE);
+ const ir::Register src = this->getRegister(*AI);
+ const ir::Register dst = this->getRegister(&I);
+ ctx.ALU1(opcode, ir::TYPE_FLOAT, dst, src);
+ }
+
+ void GenWriter::emitCallInst(CallInst &I) {
+ if (Function *F = I.getCalledFunction()) {
+ if (F->getIntrinsicID() != 0) {
+ const ir::Function &fn = ctx.getFunction();
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::stacksave:
+ {
+ const ir::Register dst = this->getRegister(&I);
+ const ir::Register src = ir::ocl::stackptr;
+ const ir::RegisterFamily family = fn.getRegisterFamily(dst);
+ ctx.MOV(ir::getType(family), dst, src);
+ }
+ break;
+ case Intrinsic::stackrestore:
+ {
+ const ir::Register dst = ir::ocl::stackptr;
+ const ir::Register src = this->getRegister(I.getOperand(0));
+ const ir::RegisterFamily family = fn.getRegisterFamily(dst);
+ ctx.MOV(ir::getType(family), dst, src);
+ }
+ break;
+#if LLVM_VERSION_MINOR == 2
+ case Intrinsic::fmuladd:
+ {
+ const ir::Register tmp = ctx.reg(ir::FAMILY_DWORD);
+ const ir::Register dst = this->getRegister(&I);
+ const ir::Register src0 = this->getRegister(I.getOperand(0));
+ const ir::Register src1 = this->getRegister(I.getOperand(1));
+ const ir::Register src2 = this->getRegister(I.getOperand(2));
+ ctx.MUL(ir::TYPE_FLOAT, tmp, src0, src1);
+ ctx.ADD(ir::TYPE_FLOAT, dst, tmp, src2);
+ break;
+ }
+ break;
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ break;
+#endif /* LLVM_VERSION_MINOR == 2 */
+ default: NOT_IMPLEMENTED;
+ }
+ } else {
+ // Get the name of the called function and handle it
+ Value *Callee = I.getCalledValue();
+ const std::string fnName = Callee->getName();
+ auto it = instrinsicMap.map.find(fnName);
+ GBE_ASSERT(it != instrinsicMap.map.end());
+
+ // Get the function arguments
+ CallSite CS(&I);
+ CallSite::arg_iterator AI = CS.arg_begin();
+#if GBE_DEBUG
+ CallSite::arg_iterator AE = CS.arg_end();
+#endif /* GBE_DEBUG */
+
+ switch (it->second) {
+ case GEN_OCL_POW:
+ {
+ const ir::Register src0 = this->getRegister(*AI); ++AI;
+ const ir::Register src1 = this->getRegister(*AI);
+ const ir::Register dst = this->getRegister(&I);
+ ctx.POW(ir::TYPE_FLOAT, dst, src0, src1);
+ break;
+ }
+ case GEN_OCL_COS: this->emitUnaryCallInst(I,CS,ir::OP_COS); break;
+ case GEN_OCL_SIN: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break;
+ case GEN_OCL_LOG: this->emitUnaryCallInst(I,CS,ir::OP_LOG); break;
+ case GEN_OCL_SQR: this->emitUnaryCallInst(I,CS,ir::OP_SQR); break;
+ case GEN_OCL_RSQ: this->emitUnaryCallInst(I,CS,ir::OP_RSQ); break;
+ case GEN_OCL_RCP: this->emitUnaryCallInst(I,CS,ir::OP_RCP); break;
+ case GEN_OCL_ABS: this->emitUnaryCallInst(I,CS,ir::OP_ABS); break;
+ case GEN_OCL_RNDZ: this->emitUnaryCallInst(I,CS,ir::OP_RNDZ); break;
+ case GEN_OCL_RNDE: this->emitUnaryCallInst(I,CS,ir::OP_RNDE); break;
+ case GEN_OCL_RNDU: this->emitUnaryCallInst(I,CS,ir::OP_RNDU); break;
+ case GEN_OCL_RNDD: this->emitUnaryCallInst(I,CS,ir::OP_RNDD); break;
+ case GEN_OCL_FORCE_SIMD8: ctx.setSimdWidth(8); break;
+ case GEN_OCL_FORCE_SIMD16: ctx.setSimdWidth(16); break;
+ case GEN_OCL_LBARRIER: ctx.SYNC(ir::syncLocalBarrier); break;
+ case GEN_OCL_GBARRIER: ctx.SYNC(ir::syncGlobalBarrier); break;
+ case GEN_OCL_LGBARRIER: ctx.SYNC(ir::syncLocalBarrier | ir::syncGlobalBarrier); break;
+ default: break;
+ }
+ }
+ }
+ }
+
+ void GenWriter::regAllocateAllocaInst(AllocaInst &I) {
+ this->newRegister(&I);
+ }
+ void GenWriter::emitAllocaInst(AllocaInst &I) {
+ Value *src = I.getOperand(0);
+ Type *elemType = I.getType()->getElementType();
+ ir::ImmediateIndex immIndex;
+ bool needMultiply = true;
+
+ // Be aware, we manipulate pointers
+ if (ctx.getPointerSize() == ir::POINTER_32_BITS)
+ immIndex = ctx.newImmediate(uint32_t(getTypeByteSize(unit, elemType)));
+ else
+ immIndex = ctx.newImmediate(uint64_t(getTypeByteSize(unit, elemType)));
+
+ // OK, we try to see if we know compile time the size we need to allocate
+ if (I.isArrayAllocation() == false) // one element allocated only
+ needMultiply = false;
+ else {
+ Constant *CPV = dyn_cast<Constant>(src);
+ if (CPV) {
+ const uint64_t elemNum = processConstant<uint64_t>(CPV, U64CPVExtractFunctor(ctx));
+ ir::Immediate imm = ctx.getImmediate(immIndex);
+ imm.data.u64 = ALIGN(imm.data.u64 * elemNum, 4);
+ ctx.setImmediate(immIndex, imm);
+ needMultiply = false;
+ } else {
+ // Brutal but cheap way to get arrays aligned on 4 bytes: we just align
+ // the element on 4 bytes!
+ ir::Immediate imm = ctx.getImmediate(immIndex);
+ imm.data.u64 = ALIGN(imm.data.u64, 4);
+ ctx.setImmediate(immIndex, imm);
+ }
+ }
+
+ // Now emit the stream of instructions to get the allocated pointer
+ const ir::RegisterFamily pointerFamily = ctx.getPointerFamily();
+ const ir::Register dst = this->getRegister(&I);
+ const ir::Register stack = ir::ocl::stackptr;
+ const ir::Register reg = ctx.reg(pointerFamily);
+ const ir::Immediate imm = ctx.getImmediate(immIndex);
+
+ // Set the destination register properly
+ ctx.MOV(imm.type, dst, stack);
+
+ // Easy case, we just increment the stack pointer
+ if (needMultiply == false) {
+ ctx.LOADI(imm.type, reg, immIndex);
+ ctx.ADD(imm.type, stack, stack, reg);
+ }
+ // Harder case (variable length array) that requires a multiply
+ else {
+ ctx.LOADI(imm.type, reg, immIndex);
+ ctx.MUL(imm.type, reg, this->getRegister(src), reg);
+ ctx.ADD(imm.type, stack, stack, reg);
+ }
+ }
+
+ static INLINE Value *getLoadOrStoreValue(LoadInst &I) {
+ return &I;
+ }
+ static INLINE Value *getLoadOrStoreValue(StoreInst &I) {
+ return I.getValueOperand();
+ }
+ void GenWriter::regAllocateLoadInst(LoadInst &I) {
+ this->newRegister(&I);
+ }
+ void GenWriter::regAllocateStoreInst(StoreInst &I) {}
+
+ template <bool isLoad, typename T>
+ INLINE void GenWriter::emitLoadOrStore(T &I)
+ {
+ GBE_ASSERTM(I.isVolatile() == false, "Volatile pointer is not supported");
+ unsigned int llvmSpace = I.getPointerAddressSpace();
+ Value *llvmPtr = I.getPointerOperand();
+ Value *llvmValues = getLoadOrStoreValue(I);
+ Type *llvmType = llvmValues->getType();
+ const bool dwAligned = (I.getAlignment() % 4) == 0;
+ const ir::AddressSpace addrSpace = addressSpaceLLVMToGen(llvmSpace);
+ const ir::Register ptr = this->getRegister(llvmPtr);
+
+ // Scalar is easy. We neednot build register tuples
+ if (isScalarType(llvmType) == true) {
+ const ir::Type type = getType(ctx, llvmType);
+ const ir::Register values = this->getRegister(llvmValues);
+ if (isLoad)
+ ctx.LOAD(type, ptr, addrSpace, dwAligned, values);
+ else
+ ctx.STORE(type, ptr, addrSpace, dwAligned, values);
+ }
+ // A vector type requires to build a tuple
+ else {
+ VectorType *vectorType = cast<VectorType>(llvmType);
+ Type *elemType = vectorType->getElementType();
+
+ // We follow OCL spec and support 2,3,4,8,16 elements only
+ const uint32_t elemNum = vectorType->getNumElements();
+ GBE_ASSERTM(elemNum == 2 || elemNum == 3 || elemNum == 4 || elemNum == 8 || elemNum == 16,
+ "Only vectors of 2,3,4,8 or 16 elements are supported");
+
+ // The code is going to be fairly different from types to types (based on
+ // size of each vector element)
+ const ir::Type type = getType(ctx, elemType);
+ const ir::RegisterFamily pointerFamily = ctx.getPointerFamily();
+
+ if (type == ir::TYPE_FLOAT || type == ir::TYPE_U32 || type == ir::TYPE_S32) {
+ // One message is enough here. Nothing special to do
+ if (elemNum <= 4) {
+ // Build the tuple data in the vector
+ vector<ir::Register> tupleData; // put registers here
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
+ const ir::Register reg = this->getRegister(llvmValues, elemID);
+ tupleData.push_back(reg);
+ }
+ const ir::Tuple tuple = ctx.arrayTuple(&tupleData[0], elemNum);
+
+ // Emit the instruction
+ if (isLoad)
+ ctx.LOAD(type, tuple, ptr, addrSpace, elemNum, dwAligned);
+ else
+ ctx.STORE(type, tuple, ptr, addrSpace, elemNum, dwAligned);
+ }
+ // Not supported by the hardware. So, we split the message and we use
+ // strided loads and stores
+ else {
+ // We simply use several uint4 loads
+ const uint32_t msgNum = elemNum / 4;
+ for (uint32_t msg = 0; msg < msgNum; ++msg) {
+ // Build the tuple data in the vector
+ vector<ir::Register> tupleData; // put registers here
+ for (uint32_t elemID = 0; elemID < 4; ++elemID) {
+ const ir::Register reg = this->getRegister(llvmValues, 4*msg+elemID);
+ tupleData.push_back(reg);
+ }
+ const ir::Tuple tuple = ctx.arrayTuple(&tupleData[0], 4);
+
+ // We may need to update to offset the pointer
+ ir::Register addr;
+ if (msg == 0)
+ addr = ptr;
+ else {
+ const ir::Register offset = ctx.reg(pointerFamily);
+ ir::ImmediateIndex immIndex;
+ ir::Type immType;
+ if (pointerFamily == ir::FAMILY_DWORD) {
+ immIndex = ctx.newImmediate(int32_t(msg*sizeof(uint32_t[4])));
+ immType = ir::TYPE_S32;
+ } else {
+ immIndex = ctx.newImmediate(int64_t(msg*sizeof(uint64_t[4])));
+ immType = ir::TYPE_S64;
+ }
+
+ addr = ctx.reg(pointerFamily);
+ ctx.LOADI(immType, offset, immIndex);
+ ctx.ADD(immType, addr, ptr, offset);
+ }
+
+ // Emit the instruction
+ if (isLoad)
+ ctx.LOAD(type, tuple, addr, addrSpace, 4, true);
+ else
+ ctx.STORE(type, tuple, addr, addrSpace, 4, true);
+ }
+ }
+ } else
+ GBE_ASSERTM(false, "loads / stores of vectors of short / chars is not supported yet");
+ }
+ }
+
+ void GenWriter::emitLoadInst(LoadInst &I) {
+ this->emitLoadOrStore<true>(I);
+ }
+
+ void GenWriter::emitStoreInst(StoreInst &I) {
+ this->emitLoadOrStore<false>(I);
+ }
+
+ llvm::FunctionPass *createGenPass(ir::Unit &unit) {
+ return new GenWriter(unit);
+ }
+} /* namespace gbe */
+
diff --git a/backend/src/llvm/llvm_gen_backend.hpp b/backend/src/llvm/llvm_gen_backend.hpp
new file mode 100644
index 0000000..c270924
--- /dev/null
+++ b/backend/src/llvm/llvm_gen_backend.hpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file llvm_gen_backend.hpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ *
+ * Pass generation functions
+ */
+#ifndef __GBE_LLVM_GEN_BACKEND_HPP__
+#define __GBE_LLVM_GEN_BACKEND_HPP__
+
+#include "llvm/Pass.h"
+#include "sys/platform.hpp"
+
+// LLVM Type
+namespace llvm { class Type; }
+
+namespace gbe
+{
+ // Final target of the Gen backend
+ namespace ir { class Unit; }
+
+ /*! Pad the offset */
+ uint32_t getPadding(uint32_t offset, uint32_t align);
+
+ /*! Get the type alignment in bytes */
+ uint32_t getAlignmentByte(const ir::Unit &unit, llvm::Type* Ty);
+
+ /*! Get the type size in bits */
+ uint32_t getTypeBitSize(const ir::Unit &unit, llvm::Type* Ty);
+
+ /*! Get the type size in bytes */
+ uint32_t getTypeByteSize(const ir::Unit &unit, llvm::Type* Ty);
+
+ /*! Create a Gen-IR unit */
+ llvm::FunctionPass *createGenPass(ir::Unit &unit);
+
+ /*! Remove the GEP instructions */
+ llvm::BasicBlockPass *createRemoveGEPPass(const ir::Unit &unit);
+
+} /* namespace gbe */
+
+#endif /* __GBE_LLVM_GEN_BACKEND_HPP__ */
+
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
new file mode 100644
index 0000000..551db3c
--- /dev/null
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -0,0 +1,42 @@
+DECL_LLVM_GEN_FUNCTION(GET_GROUP_ID0, __gen_ocl_get_group_id0)
+DECL_LLVM_GEN_FUNCTION(GET_GROUP_ID1, __gen_ocl_get_group_id1)
+DECL_LLVM_GEN_FUNCTION(GET_GROUP_ID2, __gen_ocl_get_group_id2)
+DECL_LLVM_GEN_FUNCTION(GET_LOCAL_ID0, __gen_ocl_get_local_id0)
+DECL_LLVM_GEN_FUNCTION(GET_LOCAL_ID1, __gen_ocl_get_local_id1)
+DECL_LLVM_GEN_FUNCTION(GET_LOCAL_ID2, __gen_ocl_get_local_id2)
+DECL_LLVM_GEN_FUNCTION(GET_NUM_GROUPS0, __gen_ocl_get_num_groups0)
+DECL_LLVM_GEN_FUNCTION(GET_NUM_GROUPS1, __gen_ocl_get_num_groups1)
+DECL_LLVM_GEN_FUNCTION(GET_NUM_GROUPS2, __gen_ocl_get_num_groups2)
+DECL_LLVM_GEN_FUNCTION(GET_LOCAL_SIZE0, __gen_ocl_get_local_size0)
+DECL_LLVM_GEN_FUNCTION(GET_LOCAL_SIZE1, __gen_ocl_get_local_size1)
+DECL_LLVM_GEN_FUNCTION(GET_LOCAL_SIZE2, __gen_ocl_get_local_size2)
+DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_SIZE0, __gen_ocl_get_global_size0)
+DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_SIZE1, __gen_ocl_get_global_size1)
+DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_SIZE2, __gen_ocl_get_global_size2)
+DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_OFFSET0, __gen_ocl_get_global_offset0)
+DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_OFFSET1, __gen_ocl_get_global_offset1)
+DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_OFFSET2, __gen_ocl_get_global_offset2)
+
+// Math function
+DECL_LLVM_GEN_FUNCTION(ABS, __gen_ocl_fabs)
+DECL_LLVM_GEN_FUNCTION(COS, __gen_ocl_cos)
+DECL_LLVM_GEN_FUNCTION(SIN, __gen_ocl_sin)
+DECL_LLVM_GEN_FUNCTION(SQR, __gen_ocl_sqrt)
+DECL_LLVM_GEN_FUNCTION(RSQ, __gen_ocl_rsqrt)
+DECL_LLVM_GEN_FUNCTION(LOG, __gen_ocl_log)
+DECL_LLVM_GEN_FUNCTION(POW, __gen_ocl_pow)
+DECL_LLVM_GEN_FUNCTION(RCP, __gen_ocl_rcp)
+DECL_LLVM_GEN_FUNCTION(RNDZ, __gen_ocl_rndz)
+DECL_LLVM_GEN_FUNCTION(RNDE, __gen_ocl_rnde)
+DECL_LLVM_GEN_FUNCTION(RNDU, __gen_ocl_rndu)
+DECL_LLVM_GEN_FUNCTION(RNDD, __gen_ocl_rndd)
+
+// Barrier function
+DECL_LLVM_GEN_FUNCTION(LBARRIER, __gen_ocl_barrier_local)
+DECL_LLVM_GEN_FUNCTION(GBARRIER, __gen_ocl_barrier_global)
+DECL_LLVM_GEN_FUNCTION(LGBARRIER, __gen_ocl_barrier_local_and_global)
+
+// To force SIMD8/16 compilation
+DECL_LLVM_GEN_FUNCTION(FORCE_SIMD8, __gen_ocl_force_simd8)
+DECL_LLVM_GEN_FUNCTION(FORCE_SIMD16, __gen_ocl_force_simd16)
+
diff --git a/backend/src/llvm/llvm_passes.cpp b/backend/src/llvm/llvm_passes.cpp
new file mode 100644
index 0000000..4881caa
--- /dev/null
+++ b/backend/src/llvm/llvm_passes.cpp
@@ -0,0 +1,354 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ * Heldge RHodin <alice.rhodin at alice-dsl.net>
+ */
+
+/**
+ * \file llvm_passes.cpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ * \author Heldge RHodin <alice.rhodin at alice-dsl.net>
+ */
+
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/ConstantsScanner.h"
+#include "llvm/Analysis/FindUsedTypes.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#if !defined(LLVM_VERSION_MAJOR) || (LLVM_VERSION_MINOR == 1)
+#include "llvm/Target/TargetData.h"
+#endif
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Config/config.h"
+
+#include "llvm/llvm_gen_backend.hpp"
+#include "ir/unit.hpp"
+#include "sys/map.hpp"
+
+using namespace llvm;
+
+namespace gbe
+{
+ uint32_t getPadding(uint32_t offset, uint32_t align) {
+ return (align - (offset % align)) % align;
+ }
+
+ uint32_t getAlignmentByte(const ir::Unit &unit, Type* Ty)
+ {
+ const uint32_t MAX_ALIGN = 8; //maximum size is 8 for doubles
+
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: NOT_SUPPORTED;
+ case Type::VectorTyID:
+ {
+ const VectorType* VecTy = cast<VectorType>(Ty);
+ uint32_t elemNum = VecTy->getNumElements();
+ if (elemNum == 3) elemNum = 4; // OCL spec
+ return elemNum * getTypeByteSize(unit, VecTy->getElementType());
+ }
+ case Type::PointerTyID:
+ case Type::IntegerTyID:
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ return getTypeBitSize(unit, Ty)/8;
+ case Type::ArrayTyID:
+ return getAlignmentByte(unit, cast<ArrayType>(Ty)->getElementType());
+ case Type::StructTyID:
+ {
+ const StructType* StrTy = cast<StructType>(Ty);
+ uint32_t maxa = 0;
+ for(uint32_t subtype = 0; subtype < StrTy->getNumElements(); subtype++)
+ {
+ maxa = std::max(getAlignmentByte(unit, StrTy->getElementType(subtype)), maxa);
+ if(maxa==MAX_ALIGN)
+ return maxa;
+ }
+ return maxa;
+ }
+ default: NOT_SUPPORTED;
+ }
+ return 0u;
+ }
+
+ uint32_t getTypeBitSize(const ir::Unit &unit, Type* Ty)
+ {
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: NOT_SUPPORTED;
+ case Type::PointerTyID: return unit.getPointerSize();
+ case Type::IntegerTyID: return cast<IntegerType>(Ty)->getBitWidth();
+ case Type::FloatTyID: return 32;
+ case Type::DoubleTyID: return 64;
+ case Type::VectorTyID:
+ {
+ const VectorType* VecTy = cast<VectorType>(Ty);
+ return VecTy->getNumElements() * getTypeBitSize(unit, VecTy->getElementType());
+ }
+ case Type::ArrayTyID:
+ {
+ const ArrayType* ArrTy = cast<ArrayType>(Ty);
+ Type* elementType = ArrTy->getElementType();
+ uint32_t size_element = getTypeBitSize(unit, elementType);
+ uint32_t size = ArrTy->getNumElements() * size_element;
+ uint32_t align = 8 * getAlignmentByte(unit, elementType);
+ size += (ArrTy->getNumElements()-1) * getPadding(size_element, align);
+ return size;
+ }
+ case Type::StructTyID:
+ {
+ const StructType* StrTy = cast<StructType>(Ty);
+ uint32_t size = 0;
+ for(uint32_t subtype=0; subtype < StrTy->getNumElements(); subtype++)
+ {
+ Type* elementType = StrTy->getElementType(subtype);
+ uint32_t align = 8 * getAlignmentByte(unit, elementType);
+ size += getPadding(size, align);
+ size += getTypeBitSize(unit, elementType);
+ }
+ return size;
+ }
+ default: NOT_SUPPORTED;
+ }
+ return 0u;
+ }
+
+ uint32_t getTypeByteSize(const ir::Unit &unit, Type* Ty)
+ {
+ uint32_t size_bit = getTypeBitSize(unit, Ty);
+ assert((size_bit%8==0) && "no multiple of 8");
+ return size_bit/8;
+ }
+
+ class GenRemoveGEPPasss : public BasicBlockPass
+ {
+
+ public:
+ static char ID;
+#define FORMER_VERSION 0
+#if FORMER_VERSION
+ GenRemoveGEPPasss(map<const Value *, const Value *>&
+ parentCompositePointer)
+ : BasicBlockPass(ID),
+ parentPointers(parentCompositePointer) {}
+ map<const Value *, const Value *>& parentPointers;
+#else
+ GenRemoveGEPPasss(const ir::Unit &unit) :
+ BasicBlockPass(ID),
+ unit(unit) {}
+ const ir::Unit &unit;
+#endif
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+
+ virtual const char *getPassName() const {
+ return "PTX backend: insert special ptx instructions";
+ }
+
+ bool simplifyGEPInstructions(GetElementPtrInst* GEPInst);
+
+ virtual bool runOnBasicBlock(BasicBlock &BB)
+ {
+ bool changedBlock = false;
+ iplist<Instruction>::iterator I = BB.getInstList().begin();
+ for (auto nextI = I, E = --BB.getInstList().end(); I != E; I = nextI) {
+ iplist<Instruction>::iterator I = nextI++;
+ if(GetElementPtrInst* gep = dyn_cast<GetElementPtrInst>(&*I))
+ changedBlock = (simplifyGEPInstructions(gep) || changedBlock);
+ }
+ return changedBlock;
+ }
+ };
+
+ char GenRemoveGEPPasss::ID = 0;
+
+ bool GenRemoveGEPPasss::simplifyGEPInstructions(GetElementPtrInst* GEPInst)
+ {
+ const uint32_t ptrSize = unit.getPointerSize();
+ Value* parentPointer = GEPInst->getOperand(0);
+#if FORMER_VERSION
+ Value* topParent = parentPointer;
+#endif
+ CompositeType* CompTy = cast<CompositeType>(parentPointer->getType());
+
+ if(isa<GlobalVariable>(parentPointer)) //HACK: !!!!
+ {
+#if 1//FORMER_VERSION
+ Function *constWrapper =
+ Function::Create(FunctionType::get(parentPointer->getType(),true),
+ GlobalValue::ExternalLinkage,
+ Twine("__gen_ocl_const_wrapper"));
+
+ llvm::ArrayRef<Value*> params(parentPointer);
+ // params.push_back(parentPointer);
+
+ //create and insert wrapper call
+ CallInst * wrapperCall =
+ CallInst::Create(constWrapper,params,"",GEPInst);
+ parentPointer = wrapperCall;
+#else
+ // NOT_IMPLEMENTED;
+#endif
+ }
+
+ Value* currentAddrInst =
+ new PtrToIntInst(parentPointer, IntegerType::get(GEPInst->getContext(), ptrSize), "", GEPInst);
+
+ uint32_t constantOffset = 0;
+
+ for(uint32_t op=1; op<GEPInst->getNumOperands(); ++op)
+ {
+ uint32_t TypeIndex;
+ //we have a constant struct/array acces
+ if(ConstantInt* ConstOP = dyn_cast<ConstantInt>(GEPInst->getOperand(op)))
+ {
+ uint32_t offset = 0;
+ TypeIndex = ConstOP->getZExtValue();
+ for(uint32_t ty_i=0; ty_i<TypeIndex; ty_i++)
+ {
+ Type* elementType = CompTy->getTypeAtIndex(ty_i);
+ uint32_t align = getAlignmentByte(unit, elementType);
+ offset += getPadding(offset, align);
+ offset += getTypeByteSize(unit, elementType);
+ }
+
+ //add getPaddingding for accessed type
+ const uint32_t align = getAlignmentByte(unit, CompTy->getTypeAtIndex(TypeIndex));
+ offset += getPadding(offset, align);
+
+ constantOffset += offset;
+ }
+ // none constant index (=> only array/verctor allowed)
+ else
+ {
+ // we only have array/vectors here,
+ // therefore all elements have the same size
+ TypeIndex = 0;
+
+ Type* elementType = CompTy->getTypeAtIndex(TypeIndex);
+ uint32_t size = getTypeByteSize(unit, elementType);
+
+ //add padding
+ uint32_t align = getAlignmentByte(unit, elementType);
+ size += getPadding(size, align);
+
+ Constant* newConstSize =
+ ConstantInt::get(IntegerType::get(GEPInst->getContext(), ptrSize), size);
+
+ Value *operand = GEPInst->getOperand(op);
+
+ //HACK TODO: Inserted by type replacement.. this code could break something????
+ if(getTypeByteSize(unit, operand->getType())>4)
+ {
+ GBE_ASSERTM(false, "CHECK IT");
+ operand->dump();
+
+ //previous instruction is sext or zext instr. ignore it
+ CastInst *cast = dyn_cast<CastInst>(operand);
+ if(cast && (isa<ZExtInst>(operand) || isa<SExtInst>(operand)))
+ {
+ //hope that CastInst is a s/zext
+ operand = cast->getOperand(0);
+ }
+ else
+ {
+ //trunctate
+ operand =
+ new TruncInst(operand,
+ IntegerType::get(GEPInst->getContext(),
+ ptrSize),
+ "", GEPInst);
+ }
+ }
+
+ BinaryOperator* tmpMul =
+ BinaryOperator::Create(Instruction::Mul, newConstSize, operand,
+ "", GEPInst);
+ currentAddrInst =
+ BinaryOperator::Create(Instruction::Add, currentAddrInst, tmpMul,
+ "", GEPInst);
+ }
+
+ //step down in type hirachy
+ CompTy = dyn_cast<CompositeType>(CompTy->getTypeAtIndex(TypeIndex));
+ }
+
+ //insert addition of new offset before GEPInst
+ Constant* newConstOffset =
+ ConstantInt::get(IntegerType::get(GEPInst->getContext(),
+ ptrSize),
+ constantOffset);
+ currentAddrInst =
+ BinaryOperator::Create(Instruction::Add, currentAddrInst,
+ newConstOffset, "", GEPInst);
+
+ //convert offset to ptr type (nop)
+ IntToPtrInst* intToPtrInst =
+ new IntToPtrInst(currentAddrInst,GEPInst->getType(),"", GEPInst);
+
+ //replace uses of the GEP instruction with the newly calculated pointer
+ GEPInst->replaceAllUsesWith(intToPtrInst);
+ GEPInst->dropAllReferences();
+ GEPInst->removeFromParent();
+
+#if FORMER_VERSION
+ //insert new pointer into parent list
+ while(parentPointers.find(topParent)!=parentPointers.end())
+ topParent = parentPointers.find(topParent)->second;
+ parentPointers[intToPtrInst] = topParent;
+#endif
+
+ return true;
+ }
+
+ BasicBlockPass *createRemoveGEPPass(const ir::Unit &unit) {
+ return new GenRemoveGEPPasss(unit);
+ }
+} /* namespace gbe */
+
diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp
new file mode 100644
index 0000000..21193a5
--- /dev/null
+++ b/backend/src/llvm/llvm_to_gen.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file llvm_to_gen.cpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/IRReader.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Assembly/PrintModulePass.h"
+
+#include "llvm/llvm_gen_backend.hpp"
+#include "llvm/llvm_to_gen.hpp"
+#include "sys/cvar.hpp"
+#include "sys/platform.hpp"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <memory>
+
+namespace gbe
+{
+ BVAR(OCL_OUTPUT_LLVM, false);
+ BVAR(OCL_OUTPUT_LLVM_BEFORE_EXTRA_PASS, false);
+
+ bool llvmToGen(ir::Unit &unit, const char *fileName)
+ {
+ using namespace llvm;
+
+ // Get the global LLVM context
+ llvm::LLVMContext& c = llvm::getGlobalContext();
+ std::string errInfo;
+ std::unique_ptr<llvm::raw_fd_ostream> o = NULL;
+ if (OCL_OUTPUT_LLVM_BEFORE_EXTRA_PASS || OCL_OUTPUT_LLVM)
+ o = std::unique_ptr<llvm::raw_fd_ostream>(new llvm::raw_fd_ostream("-", errInfo));
+
+ // Get the module from its file
+ SMDiagnostic Err;
+ std::auto_ptr<Module> M;
+ M.reset(ParseIRFile(fileName, Err, c));
+ if (M.get() == 0) return false;
+ Module &mod = *M.get();
+
+ llvm::PassManager passes;
+
+ // Print the code before further optimizations
+ if (OCL_OUTPUT_LLVM_BEFORE_EXTRA_PASS)
+ passes.add(createPrintModulePass(&*o));
+ passes.add(createScalarReplAggregatesPass()); // Break up allocas
+ passes.add(createRemoveGEPPass(unit));
+ passes.add(createConstantPropagationPass());
+ passes.add(createDeadInstEliminationPass()); // Remove simplified instructions
+ passes.add(createLowerSwitchPass());
+ passes.add(createPromoteMemoryToRegisterPass());
+ passes.add(createGVNPass()); // Remove redundancies
+ passes.add(createGenPass(unit));
+
+ // Print the code extra optimization passes
+ if (OCL_OUTPUT_LLVM)
+ passes.add(createPrintModulePass(&*o));
+ passes.run(mod);
+
+ // raw_fd_ostream closes stdout. We must reopen it
+ if (OCL_OUTPUT_LLVM_BEFORE_EXTRA_PASS || OCL_OUTPUT_LLVM) {
+ o = NULL;
+ const int fd = open("/dev/tty", O_WRONLY);
+ stdout = fdopen(fd, "w");
+ }
+
+ return true;
+ }
+} /* namespace gbe */
+
diff --git a/backend/src/llvm/llvm_to_gen.hpp b/backend/src/llvm/llvm_to_gen.hpp
new file mode 100644
index 0000000..4006667
--- /dev/null
+++ b/backend/src/llvm/llvm_to_gen.hpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file llvm_to_gen.hpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#ifndef __GBE_IR_LLVM_TO_GEN_HPP__
+#define __GBE_IR_LLVM_TO_GEN_HPP__
+
+namespace gbe {
+ namespace ir {
+ // The code is output into an IR unit
+ class Unit;
+ } /* namespace ir */
+
+ /*! Convert the LLVM IR code to a GEN IR code */
+ bool llvmToGen(ir::Unit &unit, const char *fileName);
+
+} /* namespace gbe */
+
+#endif /* __GBE_IR_LLVM_TO_GEN_HPP__ */
+
diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
new file mode 100644
index 0000000..8990e27
--- /dev/null
+++ b/backend/src/ocl_stdlib.h
@@ -0,0 +1,469 @@
+/*
+uint* Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __GEN_OCL_STDLIB_H__
+#define __GEN_OCL_STDLIB_H__
+
+#define INLINE __attribute__((always_inline)) inline
+#define OVERLOADABLE __attribute__((overloadable))
+#define PURE __attribute__((pure))
+#define CONST __attribute__((const))
+
+/////////////////////////////////////////////////////////////////////////////
+// OpenCL basic types
+/////////////////////////////////////////////////////////////////////////////
+typedef unsigned int uint;
+typedef unsigned int size_t;
+typedef float float2 __attribute__((ext_vector_type(2)));
+typedef float float3 __attribute__((ext_vector_type(3)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef float float8 __attribute__((ext_vector_type(8)));
+typedef float float16 __attribute__((ext_vector_type(16)));
+typedef int int2 __attribute__((ext_vector_type(2)));
+typedef int int3 __attribute__((ext_vector_type(3)));
+typedef int int4 __attribute__((ext_vector_type(4)));
+typedef int int8 __attribute__((ext_vector_type(8)));
+typedef int int16 __attribute__((ext_vector_type(16)));
+typedef unsigned int uint2 __attribute__((ext_vector_type(2)));
+typedef unsigned uint3 __attribute__((ext_vector_type(3)));
+typedef unsigned uint4 __attribute__((ext_vector_type(4)));
+typedef unsigned uint8 __attribute__((ext_vector_type(8)));
+typedef unsigned uint16 __attribute__((ext_vector_type(16)));
+typedef bool bool2 __attribute__((ext_vector_type(2)));
+typedef bool bool3 __attribute__((ext_vector_type(3)));
+typedef bool bool4 __attribute__((ext_vector_type(4)));
+typedef bool bool8 __attribute__((ext_vector_type(8)));
+typedef bool bool16 __attribute__((ext_vector_type(16)));
+
+/////////////////////////////////////////////////////////////////////////////
+// OpenCL address space
+/////////////////////////////////////////////////////////////////////////////
+#define __private __attribute__((address_space(0)))
+#define __global __attribute__((address_space(1)))
+#define __constant __attribute__((address_space(2)))
+#define __local __attribute__((address_space(4)))
+#define global __global
+//#define local __local
+#define constant __constant
+#define private __private
+
+/////////////////////////////////////////////////////////////////////////////
+// Work Items functions (see 6.11.1 of OCL 1.1 spec)
+/////////////////////////////////////////////////////////////////////////////
+// TODO get_global_offset
+// TODO get_work_dim
+
+#define DECL_INTERNAL_WORK_ITEM_FN(NAME) \
+PURE CONST unsigned int __gen_ocl_##NAME##0(void); \
+PURE CONST unsigned int __gen_ocl_##NAME##1(void); \
+PURE CONST unsigned int __gen_ocl_##NAME##2(void);
+DECL_INTERNAL_WORK_ITEM_FN(get_group_id)
+DECL_INTERNAL_WORK_ITEM_FN(get_local_id)
+DECL_INTERNAL_WORK_ITEM_FN(get_local_size)
+DECL_INTERNAL_WORK_ITEM_FN(get_global_size)
+DECL_INTERNAL_WORK_ITEM_FN(get_num_groups)
+#undef DECL_INTERNAL_WORK_ITEM_FN
+
+#define DECL_PUBLIC_WORK_ITEM_FN(NAME) \
+inline unsigned NAME(unsigned int dim) { \
+ if (dim == 0) return __gen_ocl_##NAME##0(); \
+ else if (dim == 1) return __gen_ocl_##NAME##1(); \
+ else if (dim == 2) return __gen_ocl_##NAME##2(); \
+ else return 0; \
+}
+DECL_PUBLIC_WORK_ITEM_FN(get_group_id)
+DECL_PUBLIC_WORK_ITEM_FN(get_local_id)
+DECL_PUBLIC_WORK_ITEM_FN(get_local_size)
+DECL_PUBLIC_WORK_ITEM_FN(get_global_size)
+DECL_PUBLIC_WORK_ITEM_FN(get_num_groups)
+#undef DECL_PUBLIC_WORK_ITEM_FN
+
+INLINE uint get_global_id(uint dim) {
+ return get_local_id(dim) + get_local_size(dim) * get_group_id(dim);
+}
+
+/////////////////////////////////////////////////////////////////////////////
+// Math Functions (see 6.11.2 of OCL 1.1 spec)
+/////////////////////////////////////////////////////////////////////////////
+PURE CONST float __gen_ocl_fabs(float x);
+PURE CONST float __gen_ocl_sin(float x);
+PURE CONST float __gen_ocl_cos(float x);
+PURE CONST float __gen_ocl_sqrt(float x);
+PURE CONST float __gen_ocl_rsqrt(float x);
+PURE CONST float __gen_ocl_log(float x);
+PURE CONST float __gen_ocl_pow(float x, float y);
+PURE CONST float __gen_ocl_rcp(float x);
+PURE CONST float __gen_ocl_rndz(float x);
+PURE CONST float __gen_ocl_rnde(float x);
+PURE CONST float __gen_ocl_rndu(float x);
+PURE CONST float __gen_ocl_rndd(float x);
+INLINE OVERLOADABLE float native_cos(float x) { return __gen_ocl_cos(x); }
+INLINE OVERLOADABLE float native_sin(float x) { return __gen_ocl_sin(x); }
+INLINE OVERLOADABLE float native_sqrt(float x) { return __gen_ocl_sqrt(x); }
+INLINE OVERLOADABLE float native_rsqrt(float x) { return __gen_ocl_rsqrt(x); }
+INLINE OVERLOADABLE float native_log2(float x) { return __gen_ocl_log(x); }
+INLINE OVERLOADABLE float native_log(float x) {
+ return native_log2(x) * 0.6931472002f;
+}
+INLINE OVERLOADABLE float native_log10(float x) {
+ return native_log2(x) * 0.3010299956f;
+}
+INLINE OVERLOADABLE float native_powr(float x, float y) { return __gen_ocl_pow(x,y); }
+INLINE OVERLOADABLE float native_recip(float x) { return __gen_ocl_rcp(x); }
+INLINE OVERLOADABLE float native_tan(float x) {
+ return native_sin(x) / native_cos(x);
+}
+#define E 2.71828182845904523536f
+INLINE OVERLOADABLE float native_exp(float x) { return native_powr(E, x); }
+#undef E
+
+// XXX work-around PTX profile
+#define sqrt native_sqrt
+INLINE OVERLOADABLE float rsqrt(float x) { return native_rsqrt(x); }
+INLINE OVERLOADABLE float __gen_ocl_internal_fabs(float x) { return __gen_ocl_fabs(x); }
+INLINE OVERLOADABLE float __gen_ocl_internal_trunc(float x) { return __gen_ocl_rndz(x); }
+INLINE OVERLOADABLE float __gen_ocl_internal_round(float x) { return __gen_ocl_rnde(x); }
+INLINE OVERLOADABLE float __gen_ocl_internal_floor(float x) { return __gen_ocl_rndd(x); }
+INLINE OVERLOADABLE float __gen_ocl_internal_ceil(float x) { return __gen_ocl_rndu(x); }
+INLINE OVERLOADABLE float __gen_ocl_internal_log(float x) { return native_log(x); }
+INLINE OVERLOADABLE float __gen_ocl_internal_log2(float x) { return native_log2(x); }
+INLINE OVERLOADABLE float __gen_ocl_internal_log10(float x) { return native_log10(x); }
+INLINE OVERLOADABLE float __gen_ocl_internal_exp(float x) { return native_exp(x); }
+INLINE OVERLOADABLE float powr(float x, float y) { return __gen_ocl_pow(x,y); }
+INLINE OVERLOADABLE float fmod(float x, float y) { return x-y*__gen_ocl_rndz(x/y); }
+
+// TODO use llvm intrinsics definitions
+#define cos native_cos
+#define sin native_sin
+#define pow powr
+
+INLINE OVERLOADABLE float mad(float a, float b, float c) {
+ return a*b+c;
+}
+
+INLINE OVERLOADABLE uint select(uint src0, uint src1, uint cond) {
+ return cond ? src1 : src0;
+}
+INLINE OVERLOADABLE int select(int src0, int src1, int cond) {
+ return cond ? src1 : src0;
+}
+INLINE OVERLOADABLE float select(float src0, float src1, int cond) {
+ return cond ? src1 : src0;
+}
+
+// This will be optimized out by LLVM and will output LLVM select instructions
+#define DECL_SELECT4(TYPE4, TYPE, COND_TYPE4, MASK) \
+INLINE OVERLOADABLE TYPE4 select(TYPE4 src0, TYPE4 src1, COND_TYPE4 cond) { \
+ TYPE4 dst; \
+ const TYPE x0 = src0.x; /* Fix performance issue with CLANG */ \
+ const TYPE x1 = src1.x; \
+ const TYPE y0 = src0.y; \
+ const TYPE y1 = src1.y; \
+ const TYPE z0 = src0.z; \
+ const TYPE z1 = src1.z; \
+ const TYPE w0 = src0.w; \
+ const TYPE w1 = src1.w; \
+ dst.x = (cond.x & MASK) ? x1 : x0; \
+ dst.y = (cond.y & MASK) ? y1 : y0; \
+ dst.z = (cond.z & MASK) ? z1 : z0; \
+ dst.w = (cond.w & MASK) ? w1 : w0; \
+ return dst; \
+}
+DECL_SELECT4(int4, int, int4, 0x80000000)
+DECL_SELECT4(float4, float, int4, 0x80000000)
+#undef DECL_SELECT4
+
+/////////////////////////////////////////////////////////////////////////////
+// Common Functions (see 6.11.4 of OCL 1.1 spec)
+/////////////////////////////////////////////////////////////////////////////
+#define DECL_MIN_MAX(TYPE) \
+INLINE OVERLOADABLE TYPE max(TYPE a, TYPE b) { \
+ return a > b ? a : b; \
+} \
+INLINE OVERLOADABLE TYPE min(TYPE a, TYPE b) { \
+ return a < b ? a : b; \
+}
+DECL_MIN_MAX(float)
+DECL_MIN_MAX(int)
+DECL_MIN_MAX(short)
+DECL_MIN_MAX(char)
+DECL_MIN_MAX(uint)
+DECL_MIN_MAX(unsigned short)
+DECL_MIN_MAX(unsigned char)
+#undef DECL_MIN_MAX
+
+INLINE OVERLOADABLE float __gen_ocl_internal_fmax(float a, float b) { return max(a,b); }
+INLINE OVERLOADABLE float __gen_ocl_internal_fmin(float a, float b) { return min(a,b); }
+INLINE OVERLOADABLE float mix(float x, float y, float a) { return x + (y-x)*a;}
+
+/////////////////////////////////////////////////////////////////////////////
+// Geometric functions (see 6.11.5 of OCL 1.1 spec)
+/////////////////////////////////////////////////////////////////////////////
+INLINE OVERLOADABLE float dot(float2 p0, float2 p1) {
+ return mad(p0.x,p1.x,p0.y*p1.y);
+}
+INLINE OVERLOADABLE float dot(float3 p0, float3 p1) {
+ return mad(p0.x,p1.x,mad(p0.z,p1.z,p0.y*p1.y));
+}
+INLINE OVERLOADABLE float dot(float4 p0, float4 p1) {
+ return mad(p0.x,p1.x,mad(p0.w,p1.w,mad(p0.z,p1.z,p0.y*p1.y)));
+}
+
+INLINE OVERLOADABLE float dot(float8 p0, float8 p1) {
+ return mad(p0.x,p1.x,mad(p0.s7,p1.s7, mad(p0.s6,p1.s6,mad(p0.s5,p1.s5,
+ mad(p0.s4,p1.s4,mad(p0.w,p1.w, mad(p0.z,p1.z,p0.y*p1.y)))))));
+}
+INLINE OVERLOADABLE float dot(float16 p0, float16 p1) {
+ return mad(p0.sc,p1.sc,mad(p0.sd,p1.sd,mad(p0.se,p1.se,mad(p0.sf,p1.sf,
+ mad(p0.s8,p1.s8,mad(p0.s9,p1.s9,mad(p0.sa,p1.sa,mad(p0.sb,p1.sb,
+ mad(p0.x,p1.x,mad(p0.s7,p1.s7, mad(p0.s6,p1.s6,mad(p0.s5,p1.s5,
+ mad(p0.s4,p1.s4,mad(p0.w,p1.w, mad(p0.z,p1.z,p0.y*p1.y)))))))))))))));
+}
+
+INLINE OVERLOADABLE float length(float x) { return __gen_ocl_fabs(x); }
+INLINE OVERLOADABLE float length(float2 x) { return sqrt(dot(x,x)); }
+INLINE OVERLOADABLE float length(float3 x) { return sqrt(dot(x,x)); }
+INLINE OVERLOADABLE float length(float4 x) { return sqrt(dot(x,x)); }
+INLINE OVERLOADABLE float length(float8 x) { return sqrt(dot(x,x)); }
+INLINE OVERLOADABLE float length(float16 x) { return sqrt(dot(x,x)); }
+INLINE OVERLOADABLE float distance(float x, float y) { return length(x-y); }
+INLINE OVERLOADABLE float distance(float2 x, float2 y) { return length(x-y); }
+INLINE OVERLOADABLE float distance(float3 x, float3 y) { return length(x-y); }
+INLINE OVERLOADABLE float distance(float4 x, float4 y) { return length(x-y); }
+INLINE OVERLOADABLE float distance(float8 x, float8 y) { return length(x-y); }
+INLINE OVERLOADABLE float distance(float16 x, float16 y) { return length(x-y); }
+INLINE OVERLOADABLE float normalize(float x) { return 1.f; }
+INLINE OVERLOADABLE float2 normalize(float2 x) { return x * rsqrt(dot(x, x)); }
+INLINE OVERLOADABLE float3 normalize(float3 x) { return x * rsqrt(dot(x, x)); }
+INLINE OVERLOADABLE float4 normalize(float4 x) { return x * rsqrt(dot(x, x)); }
+INLINE OVERLOADABLE float8 normalize(float8 x) { return x * rsqrt(dot(x, x)); }
+INLINE OVERLOADABLE float16 normalize(float16 x) { return x * rsqrt(dot(x, x)); }
+
+INLINE OVERLOADABLE float fast_length(float x) { return __gen_ocl_fabs(x); }
+INLINE OVERLOADABLE float fast_length(float2 x) { return sqrt(dot(x,x)); }
+INLINE OVERLOADABLE float fast_length(float3 x) { return sqrt(dot(x,x)); }
+INLINE OVERLOADABLE float fast_length(float4 x) { return sqrt(dot(x,x)); }
+INLINE OVERLOADABLE float fast_length(float8 x) { return sqrt(dot(x,x)); }
+INLINE OVERLOADABLE float fast_length(float16 x) { return sqrt(dot(x,x)); }
+INLINE OVERLOADABLE float fast_distance(float x, float y) { return length(x-y); }
+INLINE OVERLOADABLE float fast_distance(float2 x, float2 y) { return length(x-y); }
+INLINE OVERLOADABLE float fast_distance(float3 x, float3 y) { return length(x-y); }
+INLINE OVERLOADABLE float fast_distance(float4 x, float4 y) { return length(x-y); }
+INLINE OVERLOADABLE float fast_distance(float8 x, float8 y) { return length(x-y); }
+INLINE OVERLOADABLE float fast_distance(float16 x, float16 y) { return length(x-y); }
+INLINE OVERLOADABLE float fast_normalize(float x) { return 1.f; }
+INLINE OVERLOADABLE float2 fast_normalize(float2 x) { return x * rsqrt(dot(x, x)); }
+INLINE OVERLOADABLE float3 fast_normalize(float3 x) { return x * rsqrt(dot(x, x)); }
+INLINE OVERLOADABLE float4 fast_normalize(float4 x) { return x * rsqrt(dot(x, x)); }
+INLINE OVERLOADABLE float8 fast_normalize(float8 x) { return x * rsqrt(dot(x, x)); }
+INLINE OVERLOADABLE float16 fast_normalize(float16 x) { return x * rsqrt(dot(x, x)); }
+
+INLINE OVERLOADABLE float3 cross(float3 v0, float3 v1) {
+ return v0.yzx*v1.zxy-v0.zxy*v1.yzx;
+}
+INLINE OVERLOADABLE float4 cross(float4 v0, float4 v1) {
+ return (float4)(v0.yzx*v1.zxy-v0.zxy*v1.yzx, 0.f);
+}
+
+/////////////////////////////////////////////////////////////////////////////
+// Vector loads and stores
+/////////////////////////////////////////////////////////////////////////////
+
+// These loads and stores will use untyped reads and writes, so we can just
+// cast to vector loads / stores. Not C99 compliant BTW due to aliasing issue.
+// Well we do not care, we do not activate TBAA in the compiler
+#define DECL_UNTYPED_RW_SPACE_N(TYPE, DIM, SPACE) \
+INLINE OVERLOADABLE TYPE##DIM vload##DIM(size_t offset, const SPACE TYPE *p) { \
+ return *(SPACE TYPE##DIM *) (p + DIM * offset); \
+} \
+INLINE OVERLOADABLE void vstore##DIM(TYPE##DIM v, size_t offset, SPACE TYPE *p) { \
+ *(SPACE TYPE##DIM *) (p + DIM * offset) = v; \
+}
+
+#define DECL_UNTYPED_RW_ALL_SPACE(TYPE, SPACE) \
+ DECL_UNTYPED_RW_SPACE_N(TYPE, 2, SPACE) \
+ DECL_UNTYPED_RW_SPACE_N(TYPE, 3, SPACE) \
+ DECL_UNTYPED_RW_SPACE_N(TYPE, 4, SPACE) \
+ DECL_UNTYPED_RW_SPACE_N(TYPE, 8, SPACE) \
+ DECL_UNTYPED_RW_SPACE_N(TYPE, 16, SPACE)
+
+#define DECL_UNTYPED_RW_ALL(TYPE) \
+ DECL_UNTYPED_RW_ALL_SPACE(TYPE, __global) \
+ DECL_UNTYPED_RW_ALL_SPACE(TYPE, __local) \
+ DECL_UNTYPED_RW_ALL_SPACE(TYPE, __constant) \
+ DECL_UNTYPED_RW_ALL_SPACE(TYPE, __private)
+
+DECL_UNTYPED_RW_ALL(float)
+DECL_UNTYPED_RW_ALL(uint)
+DECL_UNTYPED_RW_ALL(int)
+
+#undef DECL_UNTYPED_RW_ALL
+#undef DECL_UNTYPED_RW_ALL_SPACE
+#undef DECL_UNTYPED_RW_SPACE_N
+
+/////////////////////////////////////////////////////////////////////////////
+// Declare functions for vector types which are derived from scalar ones
+/////////////////////////////////////////////////////////////////////////////
+#define DECL_VECTOR_1OP(NAME, TYPE) \
+ INLINE OVERLOADABLE TYPE##2 NAME(TYPE##2 v) { \
+ return (TYPE##2)(NAME(v.x), NAME(v.y)); \
+ }\
+ INLINE OVERLOADABLE TYPE##3 NAME(TYPE##3 v) { \
+ return (TYPE##3)(NAME(v.x), NAME(v.y), NAME(v.z)); \
+ }\
+ INLINE OVERLOADABLE TYPE##4 NAME(TYPE##4 v) { \
+ return (TYPE##4)(NAME(v.x), NAME(v.y), NAME(v.z), NAME(v.w)); \
+ }\
+ INLINE OVERLOADABLE TYPE##8 NAME(TYPE##8 v) { \
+ TYPE##8 dst;\
+ dst.s0123 = NAME(v.s0123);\
+ dst.s4567 = NAME(v.s4567);\
+ return dst;\
+ }\
+ INLINE OVERLOADABLE TYPE##16 NAME(TYPE##16 v) { \
+ TYPE##16 dst;\
+ dst.s01234567 = NAME(v.s01234567);\
+ dst.s89abcdef = NAME(v.s89abcdef);\
+ return dst;\
+ }
+DECL_VECTOR_1OP(native_cos, float);
+DECL_VECTOR_1OP(native_sin, float);
+DECL_VECTOR_1OP(native_tan, float);
+DECL_VECTOR_1OP(native_sqrt, float);
+DECL_VECTOR_1OP(native_rsqrt, float);
+DECL_VECTOR_1OP(native_log2, float);
+DECL_VECTOR_1OP(native_recip, float);
+DECL_VECTOR_1OP(__gen_ocl_internal_fabs, float);
+DECL_VECTOR_1OP(__gen_ocl_internal_trunc, float);
+DECL_VECTOR_1OP(__gen_ocl_internal_round, float);
+DECL_VECTOR_1OP(__gen_ocl_internal_floor, float);
+DECL_VECTOR_1OP(__gen_ocl_internal_ceil, float);
+DECL_VECTOR_1OP(__gen_ocl_internal_log, float);
+DECL_VECTOR_1OP(__gen_ocl_internal_log2, float);
+DECL_VECTOR_1OP(__gen_ocl_internal_log10, float);
+#undef DECL_VECTOR_1OP
+
+#define DECL_VECTOR_2OP(NAME, TYPE) \
+ INLINE OVERLOADABLE TYPE##2 NAME(TYPE##2 v0, TYPE##2 v1) { \
+ return (TYPE##2)(NAME(v0.x, v1.x), NAME(v1.y, v1.y)); \
+ }\
+ INLINE OVERLOADABLE TYPE##3 NAME(TYPE##3 v0, TYPE##3 v1) { \
+ return (TYPE##3)(NAME(v0.x, v1.x), NAME(v0.y, v1.y), NAME(v0.z, v1.z)); \
+ }\
+ INLINE OVERLOADABLE TYPE##4 NAME(TYPE##4 v0, TYPE##4 v1) { \
+ return (TYPE##4)(NAME(v0.x, v1.x), NAME(v0.y, v1.y), NAME(v0.z, v1.z), NAME(v0.w, v1.w)); \
+ }\
+ INLINE OVERLOADABLE TYPE##8 NAME(TYPE##8 v0, TYPE##8 v1) { \
+ TYPE##8 dst;\
+ dst.s0123 = NAME(v0.s0123, v1.s0123);\
+ dst.s4567 = NAME(v0.s4567, v1.s4567);\
+ return dst;\
+ }\
+ INLINE OVERLOADABLE TYPE##16 NAME(TYPE##16 v0, TYPE##16 v1) { \
+ TYPE##16 dst;\
+ dst.s01234567 = NAME(v0.s01234567, v1.s01234567);\
+ dst.s89abcdef = NAME(v0.s89abcdef, v1.s89abcdef);\
+ return dst;\
+ }
+DECL_VECTOR_2OP(min, float);
+DECL_VECTOR_2OP(max, float);
+DECL_VECTOR_2OP(fmod, float);
+DECL_VECTOR_2OP(powr, float);
+#undef DECL_VECTOR_2OP
+
+#define DECL_VECTOR_3OP(NAME, TYPE) \
+ INLINE OVERLOADABLE TYPE##2 NAME(TYPE##2 v0, TYPE##2 v1, TYPE##2 v2) { \
+ return (TYPE##2)(NAME(v0.x, v1.x, v2.x), NAME(v1.y, v1.y, v2.y)); \
+ }\
+ INLINE OVERLOADABLE TYPE##3 NAME(TYPE##3 v0, TYPE##3 v1, TYPE##3 v2) { \
+ return (TYPE##3)(NAME(v0.x, v1.x, v2.x), NAME(v0.y, v1.y, v2.y), NAME(v0.z, v1.z, v2.z)); \
+ }\
+ INLINE OVERLOADABLE TYPE##4 NAME(TYPE##4 v0, TYPE##4 v1, TYPE##4 v2) { \
+ return (TYPE##4)(NAME(v0.x, v1.x, v2.x), NAME(v0.y, v1.y, v2.y), NAME(v0.z, v1.z, v2.z), NAME(v0.w, v1.w, v2.w)); \
+ }\
+ INLINE OVERLOADABLE TYPE##8 NAME(TYPE##8 v0, TYPE##8 v1, TYPE##8 v2) { \
+ TYPE##8 dst;\
+ dst.s0123 = NAME(v0.s0123, v1.s0123, v2.s0123);\
+ dst.s4567 = NAME(v0.s4567, v1.s4567, v2.s4567);\
+ return dst;\
+ }\
+ INLINE OVERLOADABLE TYPE##16 NAME(TYPE##16 v0, TYPE##16 v1, TYPE##16 v2) { \
+ TYPE##16 dst;\
+ dst.s01234567 = NAME(v0.s01234567, v1.s01234567, v2.s01234567);\
+ dst.s89abcdef = NAME(v0.s89abcdef, v1.s89abcdef, v2.s89abcdef);\
+ return dst;\
+ }
+DECL_VECTOR_3OP(mad, float);
+DECL_VECTOR_3OP(mix, float);
+#undef DECL_VECTOR_3OP
+
+// mix requires more variants
+INLINE OVERLOADABLE float2 mix(float2 x, float2 y, float a) { return mix(x,y,(float2)(a));}
+INLINE OVERLOADABLE float3 mix(float3 x, float3 y, float a) { return mix(x,y,(float3)(a));}
+INLINE OVERLOADABLE float4 mix(float4 x, float4 y, float a) { return mix(x,y,(float4)(a));}
+INLINE OVERLOADABLE float8 mix(float8 x, float8 y, float a) { return mix(x,y,(float8)(a));}
+INLINE OVERLOADABLE float16 mix(float16 x, float16 y, float a) { return mix(x,y,(float16)(a));}
+
+// XXX workaround ptx profile
+#define fabs __gen_ocl_internal_fabs
+#define trunc __gen_ocl_internal_trunc
+#define round __gen_ocl_internal_round
+#define floor __gen_ocl_internal_floor
+#define ceil __gen_ocl_internal_ceil,
+#define log __gen_ocl_internal_log
+#define log2 __gen_ocl_internal_log2
+#define log10 __gen_ocl_internal_log10
+#define exp __gen_ocl_internal_exp
+#define fmin __gen_ocl_internal_fmin
+#define fmax __gen_ocl_internal_fmax
+
+/////////////////////////////////////////////////////////////////////////////
+// Synchronization functions
+/////////////////////////////////////////////////////////////////////////////
+#define CLK_LOCAL_MEM_FENCE (1 << 0)
+#define CLK_GLOBAL_MEM_FENCE (1 << 1)
+
+void __gen_ocl_barrier_local(void);
+void __gen_ocl_barrier_global(void);
+void __gen_ocl_barrier_local_and_global(void);
+
+typedef uint cl_mem_fence_flags;
+INLINE void barrier(cl_mem_fence_flags flags) {
+ if (flags == (CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE))
+ __gen_ocl_barrier_local_and_global();
+ else if (flags == CLK_LOCAL_MEM_FENCE)
+ __gen_ocl_barrier_local();
+ else if (flags == CLK_GLOBAL_MEM_FENCE)
+ __gen_ocl_barrier_global();
+}
+
+/////////////////////////////////////////////////////////////////////////////
+// Force the compilation to SIMD8 or SIMD16
+/////////////////////////////////////////////////////////////////////////////
+
+int __gen_ocl_force_simd8(void);
+int __gen_ocl_force_simd16(void);
+
+#define NULL ((void*)0)
+#undef PURE
+#undef CONST
+#undef OVERLOADABLE
+#undef INLINE
+#endif /* __GEN_OCL_STDLIB_H__ */
+
diff --git a/backend/src/ocl_stdlib_str.cpp b/backend/src/ocl_stdlib_str.cpp
new file mode 100644
index 0000000..d17018f
--- /dev/null
+++ b/backend/src/ocl_stdlib_str.cpp
@@ -0,0 +1,475 @@
+#include "string"
+namespace gbe {
+std::string ocl_stdlib_str =
+"/* \n"
+"uint* Copyright © 2012 Intel Corporation\n"
+" *\n"
+" * This library is free software; you can redistribute it and/or\n"
+" * modify it under the terms of the GNU Lesser General Public\n"
+" * License as published by the Free Software Foundation; either\n"
+" * version 2 of the License, or (at your option) any later version.\n"
+" *\n"
+" * This library is distributed in the hope that it will be useful,\n"
+" * but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
+" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n"
+" * Lesser General Public License for more details.\n"
+" *\n"
+" * You should have received a copy of the GNU Lesser General Public\n"
+" * License along with this library. If not, see <http://www.gnu.org/licenses/>.\n"
+" *\n"
+" * Author: Benjamin Segovia <benjamin.segovia at intel.com>\n"
+" */\n"
+"\n"
+"#ifndef __GEN_OCL_STDLIB_H__\n"
+"#define __GEN_OCL_STDLIB_H__\n"
+"\n"
+"#define INLINE __attribute__((always_inline)) inline\n"
+"#define OVERLOADABLE __attribute__((overloadable))\n"
+"#define PURE __attribute__((pure))\n"
+"#define CONST __attribute__((const))\n"
+"\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"// OpenCL basic types\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"typedef unsigned int uint;\n"
+"typedef unsigned int size_t;\n"
+"typedef float float2 __attribute__((ext_vector_type(2)));\n"
+"typedef float float3 __attribute__((ext_vector_type(3)));\n"
+"typedef float float4 __attribute__((ext_vector_type(4)));\n"
+"typedef float float8 __attribute__((ext_vector_type(8)));\n"
+"typedef float float16 __attribute__((ext_vector_type(16)));\n"
+"typedef int int2 __attribute__((ext_vector_type(2)));\n"
+"typedef int int3 __attribute__((ext_vector_type(3)));\n"
+"typedef int int4 __attribute__((ext_vector_type(4)));\n"
+"typedef int int8 __attribute__((ext_vector_type(8)));\n"
+"typedef int int16 __attribute__((ext_vector_type(16)));\n"
+"typedef unsigned int uint2 __attribute__((ext_vector_type(2)));\n"
+"typedef unsigned uint3 __attribute__((ext_vector_type(3)));\n"
+"typedef unsigned uint4 __attribute__((ext_vector_type(4)));\n"
+"typedef unsigned uint8 __attribute__((ext_vector_type(8)));\n"
+"typedef unsigned uint16 __attribute__((ext_vector_type(16)));\n"
+"typedef bool bool2 __attribute__((ext_vector_type(2)));\n"
+"typedef bool bool3 __attribute__((ext_vector_type(3)));\n"
+"typedef bool bool4 __attribute__((ext_vector_type(4)));\n"
+"typedef bool bool8 __attribute__((ext_vector_type(8)));\n"
+"typedef bool bool16 __attribute__((ext_vector_type(16)));\n"
+"\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"// OpenCL address space\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"#define __private __attribute__((address_space(0)))\n"
+"#define __global __attribute__((address_space(1)))\n"
+"#define __constant __attribute__((address_space(2)))\n"
+"#define __local __attribute__((address_space(4)))\n"
+"#define global __global\n"
+"//#define local __local\n"
+"#define constant __constant\n"
+"#define private __private\n"
+"\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"// Work Items functions (see 6.11.1 of OCL 1.1 spec)\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"// TODO get_global_offset\n"
+"// TODO get_work_dim\n"
+"\n"
+"#define DECL_INTERNAL_WORK_ITEM_FN(NAME) \\\n"
+"PURE CONST unsigned int __gen_ocl_##NAME##0(void); \\\n"
+"PURE CONST unsigned int __gen_ocl_##NAME##1(void); \\\n"
+"PURE CONST unsigned int __gen_ocl_##NAME##2(void);\n"
+"DECL_INTERNAL_WORK_ITEM_FN(get_group_id)\n"
+"DECL_INTERNAL_WORK_ITEM_FN(get_local_id)\n"
+"DECL_INTERNAL_WORK_ITEM_FN(get_local_size)\n"
+"DECL_INTERNAL_WORK_ITEM_FN(get_global_size)\n"
+"DECL_INTERNAL_WORK_ITEM_FN(get_num_groups)\n"
+"#undef DECL_INTERNAL_WORK_ITEM_FN\n"
+"\n"
+"#define DECL_PUBLIC_WORK_ITEM_FN(NAME) \\\n"
+"inline unsigned NAME(unsigned int dim) { \\\n"
+" if (dim == 0) return __gen_ocl_##NAME##0(); \\\n"
+" else if (dim == 1) return __gen_ocl_##NAME##1(); \\\n"
+" else if (dim == 2) return __gen_ocl_##NAME##2(); \\\n"
+" else return 0; \\\n"
+"}\n"
+"DECL_PUBLIC_WORK_ITEM_FN(get_group_id)\n"
+"DECL_PUBLIC_WORK_ITEM_FN(get_local_id)\n"
+"DECL_PUBLIC_WORK_ITEM_FN(get_local_size)\n"
+"DECL_PUBLIC_WORK_ITEM_FN(get_global_size)\n"
+"DECL_PUBLIC_WORK_ITEM_FN(get_num_groups)\n"
+"#undef DECL_PUBLIC_WORK_ITEM_FN\n"
+"\n"
+"INLINE uint get_global_id(uint dim) {\n"
+" return get_local_id(dim) + get_local_size(dim) * get_group_id(dim);\n"
+"}\n"
+"\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"// Math Functions (see 6.11.2 of OCL 1.1 spec)\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"PURE CONST float __gen_ocl_fabs(float x);\n"
+"PURE CONST float __gen_ocl_sin(float x);\n"
+"PURE CONST float __gen_ocl_cos(float x);\n"
+"PURE CONST float __gen_ocl_sqrt(float x);\n"
+"PURE CONST float __gen_ocl_rsqrt(float x);\n"
+"PURE CONST float __gen_ocl_log(float x);\n"
+"PURE CONST float __gen_ocl_pow(float x, float y);\n"
+"PURE CONST float __gen_ocl_rcp(float x);\n"
+"PURE CONST float __gen_ocl_rndz(float x);\n"
+"PURE CONST float __gen_ocl_rnde(float x);\n"
+"PURE CONST float __gen_ocl_rndu(float x);\n"
+"PURE CONST float __gen_ocl_rndd(float x);\n"
+"INLINE OVERLOADABLE float native_cos(float x) { return __gen_ocl_cos(x); }\n"
+"INLINE OVERLOADABLE float native_sin(float x) { return __gen_ocl_sin(x); }\n"
+"INLINE OVERLOADABLE float native_sqrt(float x) { return __gen_ocl_sqrt(x); }\n"
+"INLINE OVERLOADABLE float native_rsqrt(float x) { return __gen_ocl_rsqrt(x); }\n"
+"INLINE OVERLOADABLE float native_log2(float x) { return __gen_ocl_log(x); }\n"
+"INLINE OVERLOADABLE float native_log(float x) {\n"
+" return native_log2(x) * 0.6931472002f;\n"
+"}\n"
+"INLINE OVERLOADABLE float native_log10(float x) {\n"
+" return native_log2(x) * 0.3010299956f;\n"
+"}\n"
+"INLINE OVERLOADABLE float native_powr(float x, float y) { return __gen_ocl_pow(x,y); }\n"
+"INLINE OVERLOADABLE float native_recip(float x) { return __gen_ocl_rcp(x); }\n"
+"INLINE OVERLOADABLE float native_tan(float x) {\n"
+" return native_sin(x) / native_cos(x);\n"
+"}\n"
+"#define E 2.71828182845904523536f\n"
+"INLINE OVERLOADABLE float native_exp(float x) { return native_powr(E, x); }\n"
+"#undef E\n"
+"\n"
+"// XXX work-around PTX profile\n"
+"#define sqrt native_sqrt\n"
+"INLINE OVERLOADABLE float rsqrt(float x) { return native_rsqrt(x); }\n"
+"INLINE OVERLOADABLE float __gen_ocl_internal_fabs(float x) { return __gen_ocl_fabs(x); }\n"
+"INLINE OVERLOADABLE float __gen_ocl_internal_trunc(float x) { return __gen_ocl_rndz(x); }\n"
+"INLINE OVERLOADABLE float __gen_ocl_internal_round(float x) { return __gen_ocl_rnde(x); }\n"
+"INLINE OVERLOADABLE float __gen_ocl_internal_floor(float x) { return __gen_ocl_rndd(x); }\n"
+"INLINE OVERLOADABLE float __gen_ocl_internal_ceil(float x) { return __gen_ocl_rndu(x); }\n"
+"INLINE OVERLOADABLE float __gen_ocl_internal_log(float x) { return native_log(x); }\n"
+"INLINE OVERLOADABLE float __gen_ocl_internal_log2(float x) { return native_log2(x); }\n"
+"INLINE OVERLOADABLE float __gen_ocl_internal_log10(float x) { return native_log10(x); }\n"
+"INLINE OVERLOADABLE float __gen_ocl_internal_exp(float x) { return native_exp(x); }\n"
+"INLINE OVERLOADABLE float powr(float x, float y) { return __gen_ocl_pow(x,y); }\n"
+"INLINE OVERLOADABLE float fmod(float x, float y) { return x-y*__gen_ocl_rndz(x/y); }\n"
+"\n"
+"// TODO use llvm intrinsics definitions\n"
+"#define cos native_cos\n"
+"#define sin native_sin\n"
+"#define pow powr\n"
+"\n"
+"INLINE OVERLOADABLE float mad(float a, float b, float c) {\n"
+" return a*b+c;\n"
+"}\n"
+"\n"
+"INLINE OVERLOADABLE uint select(uint src0, uint src1, uint cond) {\n"
+" return cond ? src1 : src0;\n"
+"}\n"
+"INLINE OVERLOADABLE int select(int src0, int src1, int cond) {\n"
+" return cond ? src1 : src0;\n"
+"}\n"
+"INLINE OVERLOADABLE float select(float src0, float src1, int cond) {\n"
+" return cond ? src1 : src0;\n"
+"}\n"
+"\n"
+"// This will be optimized out by LLVM and will output LLVM select instructions\n"
+"#define DECL_SELECT4(TYPE4, TYPE, COND_TYPE4, MASK) \\\n"
+"INLINE OVERLOADABLE TYPE4 select(TYPE4 src0, TYPE4 src1, COND_TYPE4 cond) { \\\n"
+" TYPE4 dst; \\\n"
+" const TYPE x0 = src0.x; /* Fix performance issue with CLANG */ \\\n"
+" const TYPE x1 = src1.x; \\\n"
+" const TYPE y0 = src0.y; \\\n"
+" const TYPE y1 = src1.y; \\\n"
+" const TYPE z0 = src0.z; \\\n"
+" const TYPE z1 = src1.z; \\\n"
+" const TYPE w0 = src0.w; \\\n"
+" const TYPE w1 = src1.w; \\\n"
+" dst.x = (cond.x & MASK) ? x1 : x0; \\\n"
+" dst.y = (cond.y & MASK) ? y1 : y0; \\\n"
+" dst.z = (cond.z & MASK) ? z1 : z0; \\\n"
+" dst.w = (cond.w & MASK) ? w1 : w0; \\\n"
+" return dst; \\\n"
+"}\n"
+"DECL_SELECT4(int4, int, int4, 0x80000000)\n"
+"DECL_SELECT4(float4, float, int4, 0x80000000)\n"
+"#undef DECL_SELECT4\n"
+"\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"// Common Functions (see 6.11.4 of OCL 1.1 spec)\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"#define DECL_MIN_MAX(TYPE) \\\n"
+"INLINE OVERLOADABLE TYPE max(TYPE a, TYPE b) { \\\n"
+" return a > b ? a : b; \\\n"
+"} \\\n"
+"INLINE OVERLOADABLE TYPE min(TYPE a, TYPE b) { \\\n"
+" return a < b ? a : b; \\\n"
+"}\n"
+"DECL_MIN_MAX(float)\n"
+"DECL_MIN_MAX(int)\n"
+"DECL_MIN_MAX(short)\n"
+"DECL_MIN_MAX(char)\n"
+"DECL_MIN_MAX(uint)\n"
+"DECL_MIN_MAX(unsigned short)\n"
+"DECL_MIN_MAX(unsigned char)\n"
+"#undef DECL_MIN_MAX\n"
+"\n"
+"INLINE OVERLOADABLE float __gen_ocl_internal_fmax(float a, float b) { return max(a,b); }\n"
+"INLINE OVERLOADABLE float __gen_ocl_internal_fmin(float a, float b) { return min(a,b); }\n"
+"INLINE OVERLOADABLE float mix(float x, float y, float a) { return x + (y-x)*a;}\n"
+"\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"// Geometric functions (see 6.11.5 of OCL 1.1 spec)\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"INLINE OVERLOADABLE float dot(float2 p0, float2 p1) {\n"
+" return mad(p0.x,p1.x,p0.y*p1.y);\n"
+"}\n"
+"INLINE OVERLOADABLE float dot(float3 p0, float3 p1) {\n"
+" return mad(p0.x,p1.x,mad(p0.z,p1.z,p0.y*p1.y));\n"
+"}\n"
+"INLINE OVERLOADABLE float dot(float4 p0, float4 p1) {\n"
+" return mad(p0.x,p1.x,mad(p0.w,p1.w,mad(p0.z,p1.z,p0.y*p1.y)));\n"
+"}\n"
+"\n"
+"INLINE OVERLOADABLE float dot(float8 p0, float8 p1) {\n"
+" return mad(p0.x,p1.x,mad(p0.s7,p1.s7, mad(p0.s6,p1.s6,mad(p0.s5,p1.s5,\n"
+" mad(p0.s4,p1.s4,mad(p0.w,p1.w, mad(p0.z,p1.z,p0.y*p1.y)))))));\n"
+"}\n"
+"INLINE OVERLOADABLE float dot(float16 p0, float16 p1) {\n"
+" return mad(p0.sc,p1.sc,mad(p0.sd,p1.sd,mad(p0.se,p1.se,mad(p0.sf,p1.sf,\n"
+" mad(p0.s8,p1.s8,mad(p0.s9,p1.s9,mad(p0.sa,p1.sa,mad(p0.sb,p1.sb,\n"
+" mad(p0.x,p1.x,mad(p0.s7,p1.s7, mad(p0.s6,p1.s6,mad(p0.s5,p1.s5,\n"
+" mad(p0.s4,p1.s4,mad(p0.w,p1.w, mad(p0.z,p1.z,p0.y*p1.y)))))))))))))));\n"
+"}\n"
+"\n"
+"INLINE OVERLOADABLE float length(float x) { return __gen_ocl_fabs(x); }\n"
+"INLINE OVERLOADABLE float length(float2 x) { return sqrt(dot(x,x)); }\n"
+"INLINE OVERLOADABLE float length(float3 x) { return sqrt(dot(x,x)); }\n"
+"INLINE OVERLOADABLE float length(float4 x) { return sqrt(dot(x,x)); }\n"
+"INLINE OVERLOADABLE float length(float8 x) { return sqrt(dot(x,x)); }\n"
+"INLINE OVERLOADABLE float length(float16 x) { return sqrt(dot(x,x)); }\n"
+"INLINE OVERLOADABLE float distance(float x, float y) { return length(x-y); }\n"
+"INLINE OVERLOADABLE float distance(float2 x, float2 y) { return length(x-y); }\n"
+"INLINE OVERLOADABLE float distance(float3 x, float3 y) { return length(x-y); }\n"
+"INLINE OVERLOADABLE float distance(float4 x, float4 y) { return length(x-y); }\n"
+"INLINE OVERLOADABLE float distance(float8 x, float8 y) { return length(x-y); }\n"
+"INLINE OVERLOADABLE float distance(float16 x, float16 y) { return length(x-y); }\n"
+"INLINE OVERLOADABLE float normalize(float x) { return 1.f; }\n"
+"INLINE OVERLOADABLE float2 normalize(float2 x) { return x * rsqrt(dot(x, x)); }\n"
+"INLINE OVERLOADABLE float3 normalize(float3 x) { return x * rsqrt(dot(x, x)); }\n"
+"INLINE OVERLOADABLE float4 normalize(float4 x) { return x * rsqrt(dot(x, x)); }\n"
+"INLINE OVERLOADABLE float8 normalize(float8 x) { return x * rsqrt(dot(x, x)); }\n"
+"INLINE OVERLOADABLE float16 normalize(float16 x) { return x * rsqrt(dot(x, x)); }\n"
+"\n"
+"INLINE OVERLOADABLE float fast_length(float x) { return __gen_ocl_fabs(x); }\n"
+"INLINE OVERLOADABLE float fast_length(float2 x) { return sqrt(dot(x,x)); }\n"
+"INLINE OVERLOADABLE float fast_length(float3 x) { return sqrt(dot(x,x)); }\n"
+"INLINE OVERLOADABLE float fast_length(float4 x) { return sqrt(dot(x,x)); }\n"
+"INLINE OVERLOADABLE float fast_length(float8 x) { return sqrt(dot(x,x)); }\n"
+"INLINE OVERLOADABLE float fast_length(float16 x) { return sqrt(dot(x,x)); }\n"
+"INLINE OVERLOADABLE float fast_distance(float x, float y) { return length(x-y); }\n"
+"INLINE OVERLOADABLE float fast_distance(float2 x, float2 y) { return length(x-y); }\n"
+"INLINE OVERLOADABLE float fast_distance(float3 x, float3 y) { return length(x-y); }\n"
+"INLINE OVERLOADABLE float fast_distance(float4 x, float4 y) { return length(x-y); }\n"
+"INLINE OVERLOADABLE float fast_distance(float8 x, float8 y) { return length(x-y); }\n"
+"INLINE OVERLOADABLE float fast_distance(float16 x, float16 y) { return length(x-y); }\n"
+"INLINE OVERLOADABLE float fast_normalize(float x) { return 1.f; }\n"
+"INLINE OVERLOADABLE float2 fast_normalize(float2 x) { return x * rsqrt(dot(x, x)); }\n"
+"INLINE OVERLOADABLE float3 fast_normalize(float3 x) { return x * rsqrt(dot(x, x)); }\n"
+"INLINE OVERLOADABLE float4 fast_normalize(float4 x) { return x * rsqrt(dot(x, x)); }\n"
+"INLINE OVERLOADABLE float8 fast_normalize(float8 x) { return x * rsqrt(dot(x, x)); }\n"
+"INLINE OVERLOADABLE float16 fast_normalize(float16 x) { return x * rsqrt(dot(x, x)); }\n"
+"\n"
+"INLINE OVERLOADABLE float3 cross(float3 v0, float3 v1) {\n"
+" return v0.yzx*v1.zxy-v0.zxy*v1.yzx;\n"
+"}\n"
+"INLINE OVERLOADABLE float4 cross(float4 v0, float4 v1) {\n"
+" return (float4)(v0.yzx*v1.zxy-v0.zxy*v1.yzx, 0.f);\n"
+"}\n"
+"\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"// Vector loads and stores\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"\n"
+"// These loads and stores will use untyped reads and writes, so we can just\n"
+"// cast to vector loads / stores. Not C99 compliant BTW due to aliasing issue.\n"
+"// Well we do not care, we do not activate TBAA in the compiler\n"
+"#define DECL_UNTYPED_RW_SPACE_N(TYPE, DIM, SPACE) \\\n"
+"INLINE OVERLOADABLE TYPE##DIM vload##DIM(size_t offset, const SPACE TYPE *p) { \\\n"
+" return *(SPACE TYPE##DIM *) (p + DIM * offset); \\\n"
+"} \\\n"
+"INLINE OVERLOADABLE void vstore##DIM(TYPE##DIM v, size_t offset, SPACE TYPE *p) { \\\n"
+" *(SPACE TYPE##DIM *) (p + DIM * offset) = v; \\\n"
+"}\n"
+"\n"
+"#define DECL_UNTYPED_RW_ALL_SPACE(TYPE, SPACE) \\\n"
+" DECL_UNTYPED_RW_SPACE_N(TYPE, 2, SPACE) \\\n"
+" DECL_UNTYPED_RW_SPACE_N(TYPE, 3, SPACE) \\\n"
+" DECL_UNTYPED_RW_SPACE_N(TYPE, 4, SPACE) \\\n"
+" DECL_UNTYPED_RW_SPACE_N(TYPE, 8, SPACE) \\\n"
+" DECL_UNTYPED_RW_SPACE_N(TYPE, 16, SPACE)\n"
+"\n"
+"#define DECL_UNTYPED_RW_ALL(TYPE) \\\n"
+" DECL_UNTYPED_RW_ALL_SPACE(TYPE, __global) \\\n"
+" DECL_UNTYPED_RW_ALL_SPACE(TYPE, __local) \\\n"
+" DECL_UNTYPED_RW_ALL_SPACE(TYPE, __constant) \\\n"
+" DECL_UNTYPED_RW_ALL_SPACE(TYPE, __private)\n"
+"\n"
+"DECL_UNTYPED_RW_ALL(float)\n"
+"DECL_UNTYPED_RW_ALL(uint)\n"
+"DECL_UNTYPED_RW_ALL(int)\n"
+"\n"
+"#undef DECL_UNTYPED_RW_ALL\n"
+"#undef DECL_UNTYPED_RW_ALL_SPACE\n"
+"#undef DECL_UNTYPED_RW_SPACE_N\n"
+"\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"// Declare functions for vector types which are derived from scalar ones\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"#define DECL_VECTOR_1OP(NAME, TYPE) \\\n"
+" INLINE OVERLOADABLE TYPE##2 NAME(TYPE##2 v) { \\\n"
+" return (TYPE##2)(NAME(v.x), NAME(v.y)); \\\n"
+" }\\\n"
+" INLINE OVERLOADABLE TYPE##3 NAME(TYPE##3 v) { \\\n"
+" return (TYPE##3)(NAME(v.x), NAME(v.y), NAME(v.z)); \\\n"
+" }\\\n"
+" INLINE OVERLOADABLE TYPE##4 NAME(TYPE##4 v) { \\\n"
+" return (TYPE##4)(NAME(v.x), NAME(v.y), NAME(v.z), NAME(v.w)); \\\n"
+" }\\\n"
+" INLINE OVERLOADABLE TYPE##8 NAME(TYPE##8 v) { \\\n"
+" TYPE##8 dst;\\\n"
+" dst.s0123 = NAME(v.s0123);\\\n"
+" dst.s4567 = NAME(v.s4567);\\\n"
+" return dst;\\\n"
+" }\\\n"
+" INLINE OVERLOADABLE TYPE##16 NAME(TYPE##16 v) { \\\n"
+" TYPE##16 dst;\\\n"
+" dst.s01234567 = NAME(v.s01234567);\\\n"
+" dst.s89abcdef = NAME(v.s89abcdef);\\\n"
+" return dst;\\\n"
+" }\n"
+"DECL_VECTOR_1OP(native_cos, float);\n"
+"DECL_VECTOR_1OP(native_sin, float);\n"
+"DECL_VECTOR_1OP(native_tan, float);\n"
+"DECL_VECTOR_1OP(native_sqrt, float);\n"
+"DECL_VECTOR_1OP(native_rsqrt, float);\n"
+"DECL_VECTOR_1OP(native_log2, float);\n"
+"DECL_VECTOR_1OP(native_recip, float);\n"
+"DECL_VECTOR_1OP(__gen_ocl_internal_fabs, float);\n"
+"DECL_VECTOR_1OP(__gen_ocl_internal_trunc, float);\n"
+"DECL_VECTOR_1OP(__gen_ocl_internal_round, float);\n"
+"DECL_VECTOR_1OP(__gen_ocl_internal_floor, float);\n"
+"DECL_VECTOR_1OP(__gen_ocl_internal_ceil, float);\n"
+"DECL_VECTOR_1OP(__gen_ocl_internal_log, float);\n"
+"DECL_VECTOR_1OP(__gen_ocl_internal_log2, float);\n"
+"DECL_VECTOR_1OP(__gen_ocl_internal_log10, float);\n"
+"#undef DECL_VECTOR_1OP\n"
+"\n"
+"#define DECL_VECTOR_2OP(NAME, TYPE) \\\n"
+" INLINE OVERLOADABLE TYPE##2 NAME(TYPE##2 v0, TYPE##2 v1) { \\\n"
+" return (TYPE##2)(NAME(v0.x, v1.x), NAME(v1.y, v1.y)); \\\n"
+" }\\\n"
+" INLINE OVERLOADABLE TYPE##3 NAME(TYPE##3 v0, TYPE##3 v1) { \\\n"
+" return (TYPE##3)(NAME(v0.x, v1.x), NAME(v0.y, v1.y), NAME(v0.z, v1.z)); \\\n"
+" }\\\n"
+" INLINE OVERLOADABLE TYPE##4 NAME(TYPE##4 v0, TYPE##4 v1) { \\\n"
+" return (TYPE##4)(NAME(v0.x, v1.x), NAME(v0.y, v1.y), NAME(v0.z, v1.z), NAME(v0.w, v1.w)); \\\n"
+" }\\\n"
+" INLINE OVERLOADABLE TYPE##8 NAME(TYPE##8 v0, TYPE##8 v1) { \\\n"
+" TYPE##8 dst;\\\n"
+" dst.s0123 = NAME(v0.s0123, v1.s0123);\\\n"
+" dst.s4567 = NAME(v0.s4567, v1.s4567);\\\n"
+" return dst;\\\n"
+" }\\\n"
+" INLINE OVERLOADABLE TYPE##16 NAME(TYPE##16 v0, TYPE##16 v1) { \\\n"
+" TYPE##16 dst;\\\n"
+" dst.s01234567 = NAME(v0.s01234567, v1.s01234567);\\\n"
+" dst.s89abcdef = NAME(v0.s89abcdef, v1.s89abcdef);\\\n"
+" return dst;\\\n"
+" }\n"
+"DECL_VECTOR_2OP(min, float);\n"
+"DECL_VECTOR_2OP(max, float);\n"
+"DECL_VECTOR_2OP(fmod, float);\n"
+"DECL_VECTOR_2OP(powr, float);\n"
+"#undef DECL_VECTOR_2OP\n"
+"\n"
+"#define DECL_VECTOR_3OP(NAME, TYPE) \\\n"
+" INLINE OVERLOADABLE TYPE##2 NAME(TYPE##2 v0, TYPE##2 v1, TYPE##2 v2) { \\\n"
+" return (TYPE##2)(NAME(v0.x, v1.x, v2.x), NAME(v1.y, v1.y, v2.y)); \\\n"
+" }\\\n"
+" INLINE OVERLOADABLE TYPE##3 NAME(TYPE##3 v0, TYPE##3 v1, TYPE##3 v2) { \\\n"
+" return (TYPE##3)(NAME(v0.x, v1.x, v2.x), NAME(v0.y, v1.y, v2.y), NAME(v0.z, v1.z, v2.z)); \\\n"
+" }\\\n"
+" INLINE OVERLOADABLE TYPE##4 NAME(TYPE##4 v0, TYPE##4 v1, TYPE##4 v2) { \\\n"
+" return (TYPE##4)(NAME(v0.x, v1.x, v2.x), NAME(v0.y, v1.y, v2.y), NAME(v0.z, v1.z, v2.z), NAME(v0.w, v1.w, v2.w)); \\\n"
+" }\\\n"
+" INLINE OVERLOADABLE TYPE##8 NAME(TYPE##8 v0, TYPE##8 v1, TYPE##8 v2) { \\\n"
+" TYPE##8 dst;\\\n"
+" dst.s0123 = NAME(v0.s0123, v1.s0123, v2.s0123);\\\n"
+" dst.s4567 = NAME(v0.s4567, v1.s4567, v2.s4567);\\\n"
+" return dst;\\\n"
+" }\\\n"
+" INLINE OVERLOADABLE TYPE##16 NAME(TYPE##16 v0, TYPE##16 v1, TYPE##16 v2) { \\\n"
+" TYPE##16 dst;\\\n"
+" dst.s01234567 = NAME(v0.s01234567, v1.s01234567, v2.s01234567);\\\n"
+" dst.s89abcdef = NAME(v0.s89abcdef, v1.s89abcdef, v2.s89abcdef);\\\n"
+" return dst;\\\n"
+" }\n"
+"DECL_VECTOR_3OP(mad, float);\n"
+"DECL_VECTOR_3OP(mix, float);\n"
+"#undef DECL_VECTOR_3OP\n"
+"\n"
+"// mix requires more variants\n"
+"INLINE OVERLOADABLE float2 mix(float2 x, float2 y, float a) { return mix(x,y,(float2)(a));}\n"
+"INLINE OVERLOADABLE float3 mix(float3 x, float3 y, float a) { return mix(x,y,(float3)(a));}\n"
+"INLINE OVERLOADABLE float4 mix(float4 x, float4 y, float a) { return mix(x,y,(float4)(a));}\n"
+"INLINE OVERLOADABLE float8 mix(float8 x, float8 y, float a) { return mix(x,y,(float8)(a));}\n"
+"INLINE OVERLOADABLE float16 mix(float16 x, float16 y, float a) { return mix(x,y,(float16)(a));}\n"
+"\n"
+"// XXX workaround ptx profile\n"
+"#define fabs __gen_ocl_internal_fabs\n"
+"#define trunc __gen_ocl_internal_trunc\n"
+"#define round __gen_ocl_internal_round\n"
+"#define floor __gen_ocl_internal_floor\n"
+"#define ceil __gen_ocl_internal_ceil,\n"
+"#define log __gen_ocl_internal_log\n"
+"#define log2 __gen_ocl_internal_log2\n"
+"#define log10 __gen_ocl_internal_log10\n"
+"#define exp __gen_ocl_internal_exp\n"
+"#define fmin __gen_ocl_internal_fmin\n"
+"#define fmax __gen_ocl_internal_fmax\n"
+"\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"// Synchronization functions\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"#define CLK_LOCAL_MEM_FENCE (1 << 0)\n"
+"#define CLK_GLOBAL_MEM_FENCE (1 << 1)\n"
+"\n"
+"void __gen_ocl_barrier_local(void);\n"
+"void __gen_ocl_barrier_global(void);\n"
+"void __gen_ocl_barrier_local_and_global(void);\n"
+"\n"
+"typedef uint cl_mem_fence_flags;\n"
+"INLINE void barrier(cl_mem_fence_flags flags) {\n"
+" if (flags == (CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE))\n"
+" __gen_ocl_barrier_local_and_global();\n"
+" else if (flags == CLK_LOCAL_MEM_FENCE)\n"
+" __gen_ocl_barrier_local();\n"
+" else if (flags == CLK_GLOBAL_MEM_FENCE)\n"
+" __gen_ocl_barrier_global();\n"
+"}\n"
+"\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"// Force the compilation to SIMD8 or SIMD16\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"\n"
+"int __gen_ocl_force_simd8(void);\n"
+"int __gen_ocl_force_simd16(void);\n"
+"\n"
+"#define NULL ((void*)0)\n"
+"#undef PURE\n"
+"#undef CONST\n"
+"#undef OVERLOADABLE\n"
+"#undef INLINE\n"
+"#endif /* __GEN_OCL_STDLIB_H__ */\n"
+"\n"
+;
+}
+
diff --git a/backend/src/sys/Makefile b/backend/src/sys/Makefile
new file mode 100644
index 0000000..71f8a63
--- /dev/null
+++ b/backend/src/sys/Makefile
@@ -0,0 +1,3 @@
+TOP=../..
+SUBDIRS=.
+include ../../Makefile.shared
diff --git a/backend/src/sys/alloc.cpp b/backend/src/sys/alloc.cpp
new file mode 100644
index 0000000..cc2186f
--- /dev/null
+++ b/backend/src/sys/alloc.cpp
@@ -0,0 +1,359 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file alloc.cpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ *
+ * Provides facilities to track allocations and pre-initialize memory at
+ * memory allocation and memory free time
+ */
+#include "sys/alloc.hpp"
+#include "sys/atomic.hpp"
+#include "sys/mutex.hpp"
+
+#if GBE_DEBUG_MEMORY
+#include <tr1/unordered_map>
+#include <cstring>
+#endif /* GBE_DEBUG_MEMORY */
+
+#if defined(__ICC__)
+#include <stdint.h>
+#endif /* __ICC__ */
+#include <map>
+#include <vector>
+#include <iomanip>
+
+////////////////////////////////////////////////////////////////////////////////
+/// Memory debugger
+////////////////////////////////////////////////////////////////////////////////
+
+#if GBE_DEBUG_MEMORY
+namespace gbe
+{
+ /*! Store each allocation data */
+ struct AllocData {
+ INLINE AllocData(void) {}
+ INLINE AllocData(int fileName_, int functionName_, int line_, intptr_t alloc_) :
+ fileName(fileName_), functionName(functionName_), line(line_), alloc(alloc_) {}
+ int fileName, functionName, line;
+ intptr_t alloc;
+ };
+
+ /*! Store allocation information */
+ struct MemDebugger {
+ MemDebugger(void) : unfreedNum(0), allocNum(0) {}
+ ~MemDebugger(void) { this->dumpAlloc(); }
+ void* insertAlloc(void *ptr, const char *file, const char *function, int line);
+ void removeAlloc(void *ptr);
+ void dumpAlloc(void);
+ void dumpData(const AllocData &data);
+ /*! Count the still unfreed allocations */
+ volatile intptr_t unfreedNum;
+ /*! Total number of allocations done */
+ volatile intptr_t allocNum;
+ /*! Sorts the file name and function name strings */
+ std::tr1::unordered_map<const char*, int> staticStringMap;
+ /*! Each element contains the actual string */
+ std::vector<const char*> staticStringVector;
+ std::map<uintptr_t, AllocData> allocMap;
+ /*! Protect the memory debugger accesses */
+ MutexSys mutex;
+ };
+
+ void* MemDebugger::insertAlloc(void *ptr, const char *file, const char *function, int line)
+ {
+ if (ptr == NULL) return ptr;
+ Lock<MutexSys> lock(mutex);
+ const uintptr_t iptr = (uintptr_t) ptr;
+ if (UNLIKELY(allocMap.find(iptr) != allocMap.end())) {
+ this->dumpData(allocMap.find(iptr)->second);
+ FATAL("Pointer already in map");
+ }
+ const auto fileIt = staticStringMap.find(file);
+ const auto functionIt = staticStringMap.find(function);
+ int fileName, functionName;
+ if (fileIt == staticStringMap.end()) {
+ staticStringVector.push_back(file);
+ staticStringMap[file] = fileName = int(staticStringVector.size()) - 1;
+ } else
+ fileName = staticStringMap[file];
+ if (functionIt == staticStringMap.end()) {
+ staticStringVector.push_back(function);
+ staticStringMap[function] = functionName = int(staticStringVector.size()) - 1;
+ } else
+ functionName = staticStringMap[function];
+ allocMap[iptr] = AllocData(fileName, functionName, line, allocNum);
+ unfreedNum++;
+ allocNum++;
+ return ptr;
+ }
+
+ void MemDebugger::removeAlloc(void *ptr)
+ {
+ if (ptr == NULL) return;
+ Lock<MutexSys> lock(mutex);
+ const uintptr_t iptr = (uintptr_t) ptr;
+ FATAL_IF(allocMap.find(iptr) == allocMap.end(), "Pointer not referenced");
+ allocMap.erase(iptr);
+ unfreedNum--;
+ }
+
+ void MemDebugger::dumpData(const AllocData &data) {
+ std::cerr << "ALLOC " << data.alloc << ": " <<
+ "file " << staticStringVector[data.fileName] << ", " <<
+ "function " << staticStringVector[data.functionName] << ", " <<
+ "line " << data.line << std::endl;
+ }
+
+ void MemDebugger::dumpAlloc(void) {
+ std::cerr << "MemDebugger: Unfreed number: " << unfreedNum << std::endl;
+ for (const auto &alloc : allocMap) this->dumpData(alloc.second);
+ std::cerr << "MemDebugger: " << staticStringVector.size()
+ << " allocated static strings" << std::endl;
+ }
+
+ /*! The user can deactivate the memory initialization */
+ static bool memoryInitializationEnabled = true;
+
+ /*! Declare C like interface functions here */
+ static MemDebugger *memDebugger = NULL;
+
+ /*! Monitor maximum memory requirement in the compiler */
+ static MutexSys *sizeMutex = NULL;
+ static bool isMutexInitializing = true;
+ static size_t memDebuggerCurrSize(0u);
+ static size_t memDebuggerMaxSize(0u);
+ static void SizeMutexDeallocate(void) { if (sizeMutex) delete sizeMutex; }
+ static void SizeMutexAllocate(void) {
+ if (sizeMutex == NULL && isMutexInitializing == false) {
+ isMutexInitializing = true;
+ sizeMutex = new MutexSys;
+ atexit(SizeMutexDeallocate);
+ }
+ }
+
+ /*! Stop the memory debugger */
+ static void MemDebuggerEnd(void) {
+ MemDebugger *_debug = memDebugger;
+ memDebugger = NULL;
+ std::cout << "Maximum memory consumption: "
+ << std::setprecision(2) << std::fixed
+ << float(memDebuggerMaxSize) / 1024. << "KB" << std::endl;
+ delete _debug;
+ GBE_ASSERT(memDebuggerCurrSize == 0);
+ }
+
+ /*! Bring up the debugger at pre-main */
+ static struct ForceMemDebugger {
+ ForceMemDebugger(void) {
+ doesnotmatter = GBE_NEW(int);
+ GBE_DELETE(doesnotmatter);
+ }
+ int *doesnotmatter;
+ } forceMemDebugger;
+
+ /*! Start the memory debugger */
+ static void MemDebuggerStart(void) {
+ if (memDebugger == NULL) {
+ atexit(MemDebuggerEnd);
+ memDebugger = new MemDebugger;
+ }
+ }
+
+ void* MemDebuggerInsertAlloc(void *ptr, const char *file, const char *function, int line) {
+ if (memDebugger == NULL) MemDebuggerStart();
+ return memDebugger->insertAlloc(ptr, file, function, line);
+ }
+ void MemDebuggerRemoveAlloc(void *ptr) {
+ if (memDebugger == NULL) MemDebuggerStart();
+ memDebugger->removeAlloc(ptr);
+ }
+ void MemDebuggerDumpAlloc(void) {
+ if (memDebugger == NULL) MemDebuggerStart();
+ memDebugger->dumpAlloc();
+ }
+ void MemDebuggerEnableMemoryInitialization(bool enabled) {
+ memoryInitializationEnabled = enabled;
+ }
+ void MemDebuggerInitializeMem(void *mem, size_t sz) {
+ if (memoryInitializationEnabled) std::memset(mem, 0xcd, sz);
+ }
+} /* namespace gbe */
+
+#endif /* GBE_DEBUG_MEMORY */
+
+namespace gbe
+{
+#if GBE_DEBUG_MEMORY
+ void* memAlloc(size_t size) {
+ void *ptr = std::malloc(size + sizeof(size_t));
+ *(size_t *) ptr = size;
+ MemDebuggerInitializeMem((char*) ptr + sizeof(size_t), size);
+ SizeMutexAllocate();
+ if (sizeMutex) sizeMutex->lock();
+ memDebuggerCurrSize += size;
+ memDebuggerMaxSize = std::max(memDebuggerCurrSize, memDebuggerMaxSize);
+ if (sizeMutex) sizeMutex->unlock();
+ return (char *) ptr + sizeof(size_t);
+ }
+ void memFree(void *ptr) {
+ if (ptr != NULL) {
+ char *toFree = (char*) ptr - sizeof(size_t);
+ const size_t size = *(size_t *) toFree;
+ MemDebuggerInitializeMem(ptr, size);
+ SizeMutexAllocate();
+ if (sizeMutex) sizeMutex->lock();
+ memDebuggerCurrSize -= size;
+ if (sizeMutex) sizeMutex->unlock();
+ std::free(toFree);
+ }
+ }
+#else
+ void* memAlloc(size_t size) { return std::malloc(size); }
+ void memFree(void *ptr) { if (ptr != NULL) std::free(ptr); }
+#endif /* GBE_DEBUG_MEMORY */
+
+} /* namespace gbe */
+
+#if GBE_DEBUG_MEMORY
+
+namespace gbe
+{
+ void* alignedMalloc(size_t size, size_t align) {
+ void* mem = malloc(size+align+sizeof(uintptr_t) + sizeof(void*));
+ FATAL_IF (!mem && size, "memory allocation failed");
+ char* aligned = (char*) mem + sizeof(uintptr_t) + sizeof(void*);
+ aligned += align - ((uintptr_t)aligned & (align - 1));
+ ((void**)aligned)[-1] = mem;
+ ((uintptr_t*)aligned)[-2] = uintptr_t(size);
+ MemDebuggerInitializeMem(aligned, size);
+ SizeMutexAllocate();
+ if (sizeMutex) sizeMutex->lock();
+ memDebuggerCurrSize += size;
+ memDebuggerMaxSize = std::max(memDebuggerCurrSize, memDebuggerMaxSize);
+ if (sizeMutex) sizeMutex->unlock();
+ return aligned;
+ }
+
+ void alignedFree(void* ptr) {
+ if (ptr) {
+ const size_t size = ((uintptr_t*)ptr)[-2];
+ MemDebuggerInitializeMem(ptr, size);
+ free(((void**)ptr)[-1]);
+ SizeMutexAllocate();
+ if (sizeMutex) sizeMutex->lock();
+ memDebuggerCurrSize -= size;
+ if (sizeMutex) sizeMutex->unlock();
+ }
+ }
+} /* namespace gbe */
+
+#else /* GBE_DEBUG_MEMORY */
+
+////////////////////////////////////////////////////////////////////////////////
+/// Linux Platform
+////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__LINUX__)
+
+#include <unistd.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <malloc.h>
+#include <iostream>
+
+namespace gbe
+{
+ void* alignedMalloc(size_t size, size_t align) {
+ void* ptr = memalign(align,size);
+ FATAL_IF (!ptr && size, "memory allocation failed");
+ MemDebuggerInitializeMem(ptr, size);
+ return ptr;
+ }
+
+ void alignedFree(void *ptr) { if (ptr) std::free(ptr); }
+} /* namespace gbe */
+
+#else
+#error "Unsupported platform"
+#endif /* __LINUX__ */
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// Linear allocator
+////////////////////////////////////////////////////////////////////////////////
+
+namespace gbe
+{
+ LinearAllocator::Segment::Segment(size_t size) :
+ size(size), offset(0u), data(alignedMalloc(size, CACHE_LINE)), next(NULL){}
+
+ LinearAllocator::Segment::~Segment(void) {
+ alignedFree(data);
+ if (this->next) GBE_DELETE(this->next);
+ }
+
+ LinearAllocator::LinearAllocator(size_t minSize, size_t maxSize) :
+ maxSize(std::max(maxSize, size_t(CACHE_LINE)))
+ {
+ this->curr = GBE_NEW(LinearAllocator::Segment, std::max(minSize, size_t(1)));
+ }
+
+ LinearAllocator::~LinearAllocator(void) {
+ if (this->curr) GBE_DELETE(this->curr);
+ }
+
+ void *LinearAllocator::allocate(size_t size)
+ {
+#if GBE_DEBUG_SPECIAL_ALLOCATOR
+ if (ptr) GBE_ALIGNED_MALLOC(size, sizeof(void*));
+#else
+ // Try to use the current segment. This is the most likely condition here
+ this->curr->offset = ALIGN(this->curr->offset, sizeof(void*));
+ if (this->curr->offset + size <= this->curr->size) {
+ char *ptr = (char*) curr->data + this->curr->offset;
+ this->curr->offset += size;
+ return (void*) ptr;
+ }
+
+ // Well not really a use case in this code base
+ if (UNLIKELY(size > maxSize)) {
+ // This is really bad since we do two allocations
+ Segment *unfortunate = GBE_NEW(Segment, size);
+ GBE_ASSERT(this->curr);
+ Segment *next = this->curr->next;
+ this->curr->next = unfortunate;
+ unfortunate->next = next;
+ return unfortunate->data;
+ }
+
+ // OK. We need a new segment
+ const size_t segmentSize = std::max(size, 2*this->curr->size);
+ Segment *next = GBE_NEW(Segment, segmentSize);
+ next->next = curr;
+ this->curr = next;
+ char *ptr = (char*) curr->data;
+ this->curr->offset += size;
+ return ptr;
+#endif
+ }
+
+} /* namespace gbe */
+
diff --git a/backend/src/sys/alloc.hpp b/backend/src/sys/alloc.hpp
new file mode 100644
index 0000000..a6305a0
--- /dev/null
+++ b/backend/src/sys/alloc.hpp
@@ -0,0 +1,341 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file alloc.hpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#ifndef __GBE_ALLOC_HPP__
+#define __GBE_ALLOC_HPP__
+
+#include "sys/platform.hpp"
+#include "sys/assert.hpp"
+#include <algorithm>
+
+namespace gbe
+{
+ /*! regular allocation */
+ void* memAlloc(size_t size);
+ void memFree(void *ptr);
+
+ /*! Aligned allocation */
+ void* alignedMalloc(size_t size, size_t align = 64);
+ void alignedFree(void* ptr);
+
+ /*! Monitor memory allocations */
+#if GBE_DEBUG_MEMORY
+ void* MemDebuggerInsertAlloc(void*, const char*, const char*, int);
+ void MemDebuggerRemoveAlloc(void *ptr);
+ void MemDebuggerDumpAlloc(void);
+ void MemDebuggerInitializeMem(void *mem, size_t sz);
+ void MemDebuggerEnableMemoryInitialization(bool enabled);
+#else
+ INLINE void* MemDebuggerInsertAlloc(void *ptr, const char*, const char*, int) {return ptr;}
+ INLINE void MemDebuggerRemoveAlloc(void *ptr) {}
+ INLINE void MemDebuggerDumpAlloc(void) {}
+ INLINE void MemDebuggerInitializeMem(void *mem, size_t sz) {}
+ INLINE void MemDebuggerEnableMemoryInitialization(bool enabled) {}
+#endif /* GBE_DEBUG_MEMORY */
+
+ /*! Properly handle the allocated type */
+ template <typename T>
+ T* _MemDebuggerInsertAlloc(T *ptr, const char *file, const char *function, int line) {
+ MemDebuggerInsertAlloc(ptr, file, function, line);
+ return ptr;
+ }
+} /* namespace gbe */
+
+/*! Declare a class with custom allocators */
+#define GBE_CLASS(TYPE) \
+ GBE_STRUCT(TYPE) \
+private:
+
+/*! Declare a structure with custom allocators */
+#define GBE_STRUCT(TYPE) \
+public: \
+ void* operator new(size_t size) { \
+ return gbe::alignedMalloc(size, GBE_DEFAULT_ALIGNMENT); \
+ } \
+ void* operator new[](size_t size) { \
+ return gbe::alignedMalloc(size, GBE_DEFAULT_ALIGNMENT); \
+ } \
+ void* operator new(size_t size, void *p) { return p; } \
+ void* operator new[](size_t size, void *p) { return p; } \
+ void operator delete(void* ptr) { return gbe::alignedFree(ptr); } \
+ void operator delete[](void* ptr) { return gbe::alignedFree(ptr); }
+
+/*! Macros to handle allocation position */
+#define GBE_NEW(T,...) \
+ gbe::_MemDebuggerInsertAlloc(new T(__VA_ARGS__), __FILE__, __FUNCTION__, __LINE__)
+
+#define GBE_NEW_NO_ARG(T) \
+ gbe::_MemDebuggerInsertAlloc(new T, __FILE__, __FUNCTION__, __LINE__)
+
+#define GBE_NEW_ARRAY(T,N,...) \
+ gbe::_MemDebuggerInsertAlloc(new T[N](__VA_ARGS__), __FILE__, __FUNCTION__, __LINE__)
+
+#define GBE_NEW_ARRAY_NO_ARG(T,N)\
+ gbe::_MemDebuggerInsertAlloc(new T[N], __FILE__, __FUNCTION__, __LINE__)
+
+#define GBE_NEW_P(T,X,...) \
+ gbe::_MemDebuggerInsertAlloc(new (X) T(__VA_ARGS__), __FILE__, __FUNCTION__, __LINE__)
+
+#define GBE_DELETE(X) \
+ do { gbe::MemDebuggerRemoveAlloc(X); delete X; } while (0)
+
+#define GBE_DELETE_ARRAY(X) \
+ do { gbe::MemDebuggerRemoveAlloc(X); delete[] X; } while (0)
+
+#define GBE_MALLOC(SZ) \
+ gbe::MemDebuggerInsertAlloc(gbe::memAlloc(SZ),__FILE__, __FUNCTION__, __LINE__)
+
+#define GBE_FREE(X) \
+ do { gbe::MemDebuggerRemoveAlloc(X); gbe::memFree(X); } while (0)
+
+#define GBE_ALIGNED_FREE(X) \
+ do { gbe::MemDebuggerRemoveAlloc(X); gbe::alignedFree(X); } while (0)
+
+#define GBE_ALIGNED_MALLOC(SZ,ALIGN) \
+ gbe::MemDebuggerInsertAlloc(gbe::alignedMalloc(SZ,ALIGN),__FILE__, __FUNCTION__, __LINE__)
+
+namespace gbe
+{
+ /*! STL compliant allocator to intercept all memory allocations */
+ template<typename T>
+ class Allocator {
+ public:
+ typedef T value_type;
+ typedef value_type* pointer;
+ typedef const value_type* const_pointer;
+ typedef value_type& reference;
+ typedef const value_type& const_reference;
+ typedef std::size_t size_type;
+ typedef std::ptrdiff_t difference_type;
+ typedef typename std::allocator<void>::const_pointer void_allocator_ptr;
+ template<typename U>
+ struct rebind { typedef Allocator<U> other; };
+
+ INLINE Allocator(void) {}
+ INLINE ~Allocator(void) {}
+ INLINE Allocator(Allocator const&) {}
+ template<typename U>
+ INLINE Allocator(Allocator<U> const&) {}
+ INLINE pointer address(reference r) { return &r; }
+ INLINE const_pointer address(const_reference r) { return &r; }
+ INLINE pointer allocate(size_type n, void_allocator_ptr = 0) {
+ if (AlignOf<T>::value > sizeof(uintptr_t))
+ return (pointer) GBE_ALIGNED_MALLOC(n*sizeof(T), AlignOf<T>::value);
+ else
+ return (pointer) GBE_MALLOC(n * sizeof(T));
+ }
+ INLINE void deallocate(pointer p, size_type) {
+ if (AlignOf<T>::value > sizeof(uintptr_t))
+ GBE_ALIGNED_FREE(p);
+ else
+ GBE_FREE(p);
+ }
+ INLINE size_type max_size(void) const {
+ return std::numeric_limits<size_type>::max() / sizeof(T);
+ }
+ INLINE void construct(pointer p, const T& t = T()) { ::new(p) T(t); }
+ INLINE void destroy(pointer p) { p->~T(); }
+ INLINE bool operator==(Allocator const&) { return true; }
+ INLINE bool operator!=(Allocator const& a) { return !operator==(a); }
+ };
+
+// Deactivate fast allocators
+#ifndef GBE_DEBUG_SPECIAL_ALLOCATOR
+#define GBE_DEBUG_SPECIAL_ALLOCATOR 0
+#endif
+
+ /*! A growing pool never gives memory to the system but chain free elements
+ * together such as deallocation can be quickly done
+ */
+ template <typename T>
+ class GrowingPool
+ {
+ public:
+ GrowingPool(uint32_t elemNum = 1) :
+ curr(GBE_NEW(GrowingPoolElem, elemNum <= 1 ? 1 : elemNum)),
+ free(NULL), full(NULL), freeList(NULL) {}
+ ~GrowingPool(void) {
+ GBE_SAFE_DELETE(curr);
+ GBE_SAFE_DELETE(free);
+ GBE_SAFE_DELETE(full);
+ }
+ void *allocate(void) {
+#if GBE_DEBUG_SPECIAL_ALLOCATOR
+ return GBE_ALIGNED_MALLOC(sizeof(T), AlignOf<T>::value);
+#else
+ // Pick up an element from the free list
+ if (this->freeList != NULL) {
+ void *data = (void*) freeList;
+ this->freeList = *(void**) freeList;
+ return data;
+ }
+
+ // Pick up an element from the current block (if not full)
+ if (this->curr->allocated < this->curr->maxElemNum) {
+ void *data = (T*) curr->data + curr->allocated++;
+ return data;
+ }
+
+ // Block is full
+ this->curr->next = this->full;
+ this->full = this->curr;
+
+ // Try to pick up a free block
+ if (this->free) this->getFreeBlock();
+
+ // No free block we must allocate a new one
+ else
+ this->curr = GBE_NEW(GrowingPoolElem, 2 * this->curr->maxElemNum);
+
+ void *data = (T*) curr->data + curr->allocated++;
+ return data;
+#endif /* GBE_DEBUG_SPECIAL_ALLOCATOR */
+ }
+ void deallocate(void *t) {
+ if (t == NULL) return;
+#if GBE_DEBUG_SPECIAL_ALLOCATOR
+ GBE_ALIGNED_FREE(t);
+#else
+ *(void**) t = this->freeList;
+ this->freeList = t;
+#endif /* GBE_DEBUG_SPECIAL_ALLOCATOR */
+ }
+ void rewind(void) {
+#if GBE_DEBUG_SPECIAL_ALLOCATOR == 0
+ // All free elements return to their blocks
+ this->freeList = NULL;
+
+ // Put back current block in full list
+ if (this->curr) {
+ this->curr->next = this->full;
+ this->full = this->curr;
+ this->curr = NULL;
+ }
+
+ // Reverse the chain list and mark all blocks as empty
+ while (this->full) {
+ GrowingPoolElem *next = this->full->next;
+ this->full->allocated = 0;
+ this->full->next = this->free;
+ this->free = this->full;
+ this->full = next;
+ }
+
+ // Provide a valid current block
+ this->getFreeBlock();
+#endif /* GBE_DEBUG_SPECIAL_ALLOCATOR */
+ }
+ private:
+ /*! Pick-up a free block */
+ INLINE void getFreeBlock(void) {
+ GBE_ASSERT(this->free);
+ this->curr = this->free;
+ this->free = this->free->next;
+ this->curr->next = NULL;
+ }
+ /*! Chunk of elements to allocate */
+ class GrowingPoolElem
+ {
+ friend class GrowingPool;
+ GrowingPoolElem(size_t elemNum) {
+ const size_t sz = std::max(sizeof(T), sizeof(void*));
+ this->data = (T*) GBE_ALIGNED_MALLOC(elemNum * sz, AlignOf<T>::value);
+ this->next = NULL;
+ this->maxElemNum = elemNum;
+ this->allocated = 0;
+ }
+ ~GrowingPoolElem(void) {
+ GBE_ALIGNED_FREE(this->data);
+ if (this->next) GBE_DELETE(this->next);
+ }
+ T *data;
+ GrowingPoolElem *next;
+ size_t allocated, maxElemNum;
+ };
+ GrowingPoolElem *curr; //!< To get new element from
+ GrowingPoolElem *free; //!< Blocks that can be reused (after rewind)
+ GrowingPoolElem *full; //!< Blocks fully used
+ void *freeList; //!< Elements that have been deallocated
+ GBE_CLASS(GrowingPool);
+ };
+
+/*! Helper macros to build and destroy objects with a growing pool */
+#define DECL_POOL(TYPE, POOL) \
+ GrowingPool<TYPE> POOL; \
+ template <typename... Args> \
+ TYPE *new##TYPE(Args&&... args) { \
+ return new (POOL.allocate()) TYPE(args...); \
+ } \
+ void delete##TYPE(TYPE *ptr) { \
+ ptr->~TYPE(); \
+ POOL.deallocate(ptr); \
+ }
+
+ /*! A linear allocator just grows and does not reuse freed memory. It can
+ * however allocate objects of any size
+ */
+ class LinearAllocator
+ {
+ public:
+ /*! Initiate the linear allocator (one segment is allocated) */
+ LinearAllocator(size_t minSize = CACHE_LINE, size_t maxSize = 64*KB);
+ /*! Free up everything */
+ ~LinearAllocator(void);
+ /*! Allocate size bytes */
+ void *allocate(size_t size);
+ /*! Nothing here */
+ INLINE void deallocate(void *ptr) {
+#if GBE_DEBUG_SPECIAL_ALLOCATOR
+ if (ptr) GBE_ALIGNED_FREE(ptr);
+#endif /* GBE_DEBUG_SPECIAL_ALLOCATOR */
+ }
+ private:
+ /*! Helds an allocated segment of memory */
+ struct Segment {
+ /*! Allocate a new segment */
+ Segment(size_t size);
+ /*! Destroy the segment and the next ones */
+ ~Segment(void);
+ /* Size of the segment */
+ size_t size;
+ /*! Offset to the next free bytes (if any left) */
+ size_t offset;
+ /*! Pointer to valid data */
+ void *data;
+ /*! Pointer to the next segment */
+ Segment *next;
+ /*! Use internal allocator */
+ GBE_STRUCT(Segment);
+ };
+ /*! Points to the current segment we can allocate from */
+ Segment *curr;
+ /*! Maximum segment size */
+ size_t maxSize;
+ /*! Use internal allocator */
+ GBE_CLASS(LinearAllocator);
+ };
+
+} /* namespace gbe */
+
+#endif /* __GBE_ALLOC_HPP__ */
+
diff --git a/backend/src/sys/assert.cpp b/backend/src/sys/assert.cpp
new file mode 100644
index 0000000..52178a1
--- /dev/null
+++ b/backend/src/sys/assert.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file assert.cpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#if GBE_COMPILE_UTESTS
+
+#include "sys/assert.hpp"
+#include "sys/exception.hpp"
+#include "sys/cvar.hpp"
+#include <cassert>
+#include <cstdlib>
+
+namespace gbe
+{
+ BVAR(OCL_BREAK_POINT_IN_ASSERTION, false);
+ BVAR(OCL_ABORT_IN_ASSERTION, false);
+
+ void onFailedAssertion(const char *msg, const char *file, const char *fn, int line)
+ {
+ char lineString[256];
+ sprintf(lineString, "%i", line);
+ assert(msg != NULL && file != NULL && fn != NULL);
+ const std::string str = "Compiler error: "
+ + std::string(msg) + "\n at file "
+ + std::string(file)
+ + ", function " + std::string(fn)
+ + ", line " + std::string(lineString);
+ if (OCL_BREAK_POINT_IN_ASSERTION)
+ DEBUGBREAK();
+ if (OCL_ABORT_IN_ASSERTION) {
+ assert(false);
+ exit(-1);
+ }
+ throw Exception(str);
+ }
+} /* namespace gbe */
+
+#else
+
+#include "sys/assert.hpp"
+#include "sys/exception.hpp"
+#include "sys/platform.hpp"
+#include <cstdio>
+#include <cstdlib>
+#include <unistd.h>
+
+namespace gbe
+{
+ void onFailedAssertion(const char *msg, const char *file, const char *fn, int32_t line)
+ {
+ assert(msg != NULL && file != NULL && fn != NULL);
+ fprintf(stderr, "ASSERTION FAILED: %s\n"
+ " at file %s, function %s, line %i\n",
+ msg, file, fn, line);
+ fflush(stdout);
+ DEBUGBREAK();
+ _exit(-1);
+ }
+} /* namespace gbe */
+
+#endif /* GBE_COMPILE_UTESTS */
+
diff --git a/backend/src/sys/assert.hpp b/backend/src/sys/assert.hpp
new file mode 100644
index 0000000..553e391
--- /dev/null
+++ b/backend/src/sys/assert.hpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file assert.hpp
+ *
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#ifndef __GBE_ASSERT_HPP__
+#define __GBE_ASSERT_HPP__
+
+namespace gbe
+{
+ /*! To ensure that condition truth. Optional message is supported */
+ void onFailedAssertion(const char *msg, const char *file, const char *fn, int line);
+} /* namespace gbe */
+
+#endif /* __GBE_ASSERT_HPP__ */
+
diff --git a/backend/src/sys/atomic.hpp b/backend/src/sys/atomic.hpp
new file mode 100644
index 0000000..3684ae9
--- /dev/null
+++ b/backend/src/sys/atomic.hpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef __GBE_ATOMIC_HPP__
+#define __GBE_ATOMIC_HPP__
+
+#include "sys/intrinsics.hpp"
+
+namespace gbe
+{
+ template <typename T>
+ struct AtomicInternal {
+ protected:
+ AtomicInternal(const AtomicInternal&); // don't implement
+ AtomicInternal& operator= (const AtomicInternal&); // don't implement
+
+ public:
+ INLINE AtomicInternal(void) {}
+ INLINE AtomicInternal(T data) : data(data) {}
+ INLINE AtomicInternal& operator =(const T input) { data = input; return *this; }
+ INLINE operator T() const { return data; }
+ INLINE void storeRelease(T x) { __store_release(&data, x); }
+ public:
+ INLINE friend T operator+= (AtomicInternal& value, T input) { return atomic_add(&value.data, input) + input; }
+ INLINE friend T operator++ (AtomicInternal& value) { return atomic_add(&value.data, 1) + 1; }
+ INLINE friend T operator-- (AtomicInternal& value) { return atomic_add(&value.data, -1) - 1; }
+ INLINE friend T operator++ (AtomicInternal& value, int) { return atomic_add(&value.data, 1); }
+ INLINE friend T operator-- (AtomicInternal& value, int) { return atomic_add(&value.data, -1); }
+ INLINE friend T cmpxchg (AtomicInternal& value, const T v, const T c) { return atomic_cmpxchg(&value.data,v,c); }
+
+ private:
+ volatile T data;
+ GBE_STRUCT(AtomicInternal);
+ };
+
+ typedef AtomicInternal<atomic32_t> Atomic32;
+ typedef AtomicInternal<atomic_t> Atomic;
+}
+
+#endif /* __GBE_ATOMIC_HPP__ */
+
diff --git a/backend/src/sys/cvar.cpp b/backend/src/sys/cvar.cpp
new file mode 100644
index 0000000..1ee2c98
--- /dev/null
+++ b/backend/src/sys/cvar.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file cvar.cpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "sys/cvar.hpp"
+#include <cstdio>
+
+namespace gbe
+{
+
+ CVarInit::CVarInit(const char *name, int32_t *addr, int32_t imin, int32_t i, int32_t imax) :
+ varType(CVarInit::INTEGER)
+ {
+ this->i.min = imin;
+ this->i.max = imax;
+ const char *env = getenv(name);
+ if (env != NULL) {
+ sscanf(env, "%i", &i);
+ i = std::min(imax, std::max(imin, i));
+ }
+ *addr = i;
+ }
+
+ CVarInit::CVarInit(const char *name, float *addr, float fmin, float f, float fmax) :
+ varType(CVarInit::FLOAT)
+ {
+ this->f.min = fmin;
+ this->f.max = fmax;
+ const char *env = getenv(name);
+ if (env != NULL) {
+ sscanf(env, "%f", &f);
+ f = std::min(fmax, std::max(fmin, f));
+ }
+ *addr = f;
+ }
+
+ CVarInit::CVarInit(const char *name, std::string *str, const std::string &v) :
+ varType(CVarInit::STRING)
+ {
+ const char *env = getenv(name);
+ *str = env != NULL ? env : v;
+ }
+
+} /* namespace gbe */
+
diff --git a/backend/src/sys/cvar.hpp b/backend/src/sys/cvar.hpp
new file mode 100644
index 0000000..7350a3e
--- /dev/null
+++ b/backend/src/sys/cvar.hpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file cvar.hpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ *
+ * Quake like console variable system. Just use the environment variables from
+ * the console to change their value
+ */
+
+#ifndef __GBE_CVAR_HPP__
+#define __GBE_CVAR_HPP__
+
+#include "sys/platform.hpp"
+
+namespace gbe
+{
+ /*! A CVar is either a float, an integer or a string value. CVarInit is only
+ * here to set the global variable in pre-main
+ */
+ class CVarInit
+ {
+ public:
+ enum {
+ STRING = 0,
+ INTEGER = 1,
+ FLOAT = 2
+ };
+ /*! Build a CVar from an integer environment variable */
+ explicit CVarInit(const char *name, int32_t *addr, int32_t imin, int32_t i, int32_t imax);
+ /*! Build a CVar from a float environment variable */
+ explicit CVarInit(const char *name, float *addr, float fmin, float f, float fmax);
+ /*! Build a CVar from a string environment variable */
+ explicit CVarInit(const char *name, std::string *str, const std::string &v);
+ int varType; //!< STRING, INTEGER or FLOAT
+ std::string *str; //!< string variable
+ union {
+ struct { int32_t min, *curr, max; } i; //!< integer variables with bounds
+ struct { float min, *curr, max; } f; //!< float variables with bounds
+ };
+ };
+} /* namespace gbe */
+
+/*! Declare an integer console variable */
+#define IVAR(NAME, MIN, CURR, MAX) \
+ int32_t NAME; \
+ static gbe::CVarInit __CVAR##NAME##__LINE__##__(#NAME, &NAME, int32_t(MIN), int32_t(CURR), int32_t(MAX));
+
+/*! Declare a float console variable */
+#define FVAR(NAME, MIN, CURR, MAX) \
+ float NAME; \
+ static gbe::CVarInit __CVAR##NAME##__LINE__##__(#NAME, &NAME, float(MIN), float(CURR), float(MAX));
+
+/*! Declare a string console variable */
+#define SVAR(NAME, STR) \
+ std::string NAME; \
+ static gbe::CVarInit __CVAR##NAME##__LINE__##__(#NAME, &NAME, STR);
+
+/*! Declare a Boolean variable (just an integer in {0,1}) */
+#define BVAR(NAME, CURR) IVAR(NAME, 0, CURR ? 1 : 0, 1)
+
+#endif /* __GBE_CVAR_HPP__ */
+
diff --git a/backend/src/sys/exception.hpp b/backend/src/sys/exception.hpp
new file mode 100644
index 0000000..d74ca0d
--- /dev/null
+++ b/backend/src/sys/exception.hpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file exception.hpp
+ *
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __GBE_EXCEPTION_HPP__
+#define __GBE_EXCEPTION_HPP__
+
+#if GBE_COMPILE_UTESTS
+
+#include <exception>
+#include <string>
+
+namespace gbe
+{
+ /*! Exception are only used while using unit tests */
+ class Exception : public std::exception
+ {
+ public:
+ Exception(const std::string &msg) throw() : msg(msg) {}
+ Exception(const Exception &other) throw() : msg(other.msg) {}
+ ~Exception(void) throw() {}
+ Exception &operator= (const Exception &other) throw() {
+ this->msg = other.msg;
+ return *this;
+ }
+ const char *what(void) const throw() { return msg.c_str(); }
+ private:
+ std::string msg; //!< String message
+ };
+
+} /* namespace gbe */
+
+#endif /* GBE_COMPILE_UTESTS */
+#endif /* __GBE_EXCEPTION_HPP__ */
+
diff --git a/backend/src/sys/fixed_array.hpp b/backend/src/sys/fixed_array.hpp
new file mode 100644
index 0000000..d84c350
--- /dev/null
+++ b/backend/src/sys/fixed_array.hpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file fixed_array.hpp
+ *
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __GBE_FIXED_ARRAY_HPP__
+#define __GBE_FIXED_ARRAY_HPP__
+
+#include "platform.hpp"
+#include <cstring>
+
+namespace gbe
+{
+ /*! Regular C array but with bound checks */
+ template<typename T, size_t N>
+ class fixed_array
+ {
+ public:
+ /*! Do not initialize the data */
+ fixed_array(void) {}
+ /*! Copy the input array */
+ fixed_array(const T array[N]) { std::memcpy(elem, array, N * sizeof(T)); }
+ /*! First element (non const) */
+ T* begin(void) { return &elem[0]; }
+ /*! First non-valid element (non const) */
+ T* end(void) { return begin() + N; }
+ /*! First element (const) */
+ const T* begin(void) const { return &elem[0]; }
+ /*! First non-valid element (const) */
+ const T* end(void) const { return begin() + N; }
+ /*! Number of elements in the array */
+ size_t size(void) const { return N; }
+ /*! Get the pointer to the data (non-const) */
+ T* data(void) { return &elem[0]; }
+ /*! Get the pointer to the data (const) */
+ const T* data(void) const { return &elem[0]; }
+ /*! First element (const) */
+ const T& front(void) const { return *begin(); }
+ /*! Last element (const) */
+ const T& back(void) const { return *(end() - 1); }
+ /*! First element (non-const) */
+ T& front(void) { return *begin(); }
+ /*! Last element (non-const) */
+ T& back(void) { return *(end() - 1); }
+ /*! Get element at position index (with bound check) */
+ INLINE T& operator[] (size_t index) {
+ GBE_ASSERT(index < size());
+ return elem[index];
+ }
+ /*! Get element at position index (with bound check) */
+ INLINE const T& operator[] (size_t index) const {
+ GBE_ASSERT(index < size());
+ return elem[index];
+ }
+ private:
+ T elem[N]; //!< Store the elements
+ STATIC_ASSERT(N > 0); //!< zero element is not allowed
+ GBE_CLASS(fixed_array);
+ };
+
+} /* namespace gbe */
+
+#endif /* __GBE_FIXED_ARRAY_HPP__ */
+
diff --git a/backend/src/sys/hash_map.hpp b/backend/src/sys/hash_map.hpp
new file mode 100644
index 0000000..fb1d1ef
--- /dev/null
+++ b/backend/src/sys/hash_map.hpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file hash_map.hpp
+ *
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __GBE_HASH_MAP_HPP__
+#define __GBE_HASH_MAP_HPP__
+
+#include "sys/platform.hpp"
+
+#ifdef __MSVC__
+#include <unordered_map>
+#else
+#include <tr1/unordered_map>
+#endif /* __MSVC__ */
+
+namespace gbe
+{
+ /*! Add specific allocator to the hash map */
+ template <class Key,
+ class T,
+ class Hash = std::hash<Key>,
+ class Pred = std::equal_to<Key>>
+ class hash_map : public std::tr1::unordered_map<Key,T,Hash,Pred,Allocator<std::pair<const Key,T>>>,
+ public NonCopyable
+ {
+ public:
+ // Typedefs
+ typedef std::pair<const Key, T> value_type;
+ typedef Allocator<value_type> allocator_type;
+ typedef std::tr1::unordered_map<Key,T,Hash,Pred,allocator_type> parent_type;
+ typedef typename allocator_type::size_type size_type;
+ typedef Key key_type;
+ typedef T mapped_type;
+ typedef Hash hasher;
+ typedef Pred key_equal;
+
+ /*! Default constructor */
+ INLINE explicit hash_map(size_type n = 3,
+ const hasher& hf = hasher(),
+ const key_equal& eql = key_equal(),
+ const allocator_type& a = allocator_type()) :
+ parent_type(n, hf, eql, a) {}
+ /*! Iteration constructor */
+ template <class InputIterator>
+ INLINE hash_map(InputIterator first,
+ InputIterator last,
+ size_type n = 3,
+ const hasher& hf = hasher(),
+ const key_equal& eql = key_equal(),
+ const allocator_type& a = allocator_type()) :
+ parent_type(first,last,n,hf,eql,a) {}
+#if 0
+ /*! Copy constructor */
+ INLINE hash_map(const hash_map &other) : parent_type(other) {}
+#endif
+ GBE_CLASS(hash_map);
+ };
+} /* namespace gbe */
+
+#endif /* __GBE_HASH_MAP_HPP__ */
+
diff --git a/backend/src/sys/intrinsics.hpp b/backend/src/sys/intrinsics.hpp
new file mode 100644
index 0000000..2e25dc7
--- /dev/null
+++ b/backend/src/sys/intrinsics.hpp
@@ -0,0 +1,209 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef __GBE_INTRINSICS_HPP__
+#define __GBE_INTRINSICS_HPP__
+
+#include "sys/platform.hpp"
+#include <xmmintrin.h>
+#include <emmintrin.h>
+
+#if defined(__MSVC__)
+
+#include <intrin.h>
+
+#define GBE_COMPILER_WRITE_BARRIER _WriteBarrier()
+#define GBE_COMPILER_READ_WRITE_BARRIER _ReadWriteBarrier()
+
+#if _MSC_VER >= 1400
+#pragma intrinsic(_ReadBarrier)
+#define GBE_COMPILER_READ_BARRIER _ReadBarrier()
+#else
+#define GBE_COMPILER_READ_BARRIER _ReadWriteBarrier()
+#endif /* _MSC_VER >= 1400 */
+
+INLINE int __bsf(int v) {
+ unsigned long r = 0; _BitScanForward(&r,v); return r;
+}
+
+INLINE int __bsr(int v) {
+ unsigned long r = 0; _BitScanReverse(&r,v); return r;
+}
+
+INLINE int __btc(int v, int i) {
+ long r = v; _bittestandcomplement(&r,i); return r;
+}
+
+INLINE int __bts(int v, int i) {
+ long r = v; _bittestandset(&r,i); return r;
+}
+
+INLINE int __btr(int v, int i) {
+ long r = v; _bittestandreset(&r,i); return r;
+}
+
+INLINE void memoryFence(void) { _mm_mfence(); }
+
+#if defined(__X86_64__) && !defined(__INTEL_COMPILER)
+
+INLINE size_t __bsf(size_t v) {
+ unsigned long r = 0; _BitScanForward64(&r,v); return r;
+}
+
+INLINE size_t __bsr(size_t v) {
+ unsigned long r = 0; _BitScanReverse64(&r,v); return r;
+}
+
+INLINE size_t __btc(size_t v, size_t i) {
+ __int64_t r = v; _bittestandcomplement64(&r,i); return r;
+}
+
+INLINE size_t __bts(size_t v, size_t i) {
+ __int64_t r = v; _bittestandset64(&r,i); return r;
+}
+
+INLINE size_t __btr(size_t v, size_t i) {
+ __int64_t r = v; _bittestandreset64(&r,i); return r;
+}
+
+#endif /* defined(__X86_64__) && !defined(__INTEL_COMPILER) */
+
+typedef int32_t atomic32_t;
+
+INLINE int32_t atomic_add(volatile int32_t* m, const int32_t v) {
+ return _InterlockedExchangeAdd((volatile long*)m,v);
+}
+
+INLINE int32_t atomic_cmpxchg(volatile int32_t* m, const int32_t v, const int32_t c) {
+ return _InterlockedCompareExchange((volatile long*)m,v,c);
+}
+
+#if defined(__X86_64__)
+
+typedef int64_t atomic_t;
+
+INLINE int64_t atomic_add(volatile int64_t* m, const int64_t v) {
+ return _InterlockedExchangeAdd64(m,v);
+}
+
+INLINE int64_t atomic_cmpxchg(volatile int64_t* m, const int64_t v, const int64_t c) {
+ return _InterlockedCompareExchange64(m,v,c);
+}
+
+#else
+
+typedef int32_t atomic_t;
+
+#endif /* defined(__X86_64__) */
+
+#else
+
+INLINE unsigned int __popcnt(unsigned int in) {
+ int r = 0; asm ("popcnt %1,%0" : "=r"(r) : "r"(in)); return r;
+}
+
+INLINE int __bsf(int v) {
+ int r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r;
+}
+
+INLINE int __bsr(int v) {
+ int r = 0; asm ("bsr %1,%0" : "=r"(r) : "r"(v)); return r;
+}
+
+INLINE int __btc(int v, int i) {
+ int r = 0; asm ("btc %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r;
+}
+
+INLINE int __bts(int v, int i) {
+ int r = 0; asm ("bts %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r;
+}
+
+INLINE int __btr(int v, int i) {
+ int r = 0; asm ("btr %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r;
+}
+
+INLINE size_t __bsf(size_t v) {
+ size_t r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r;
+}
+
+INLINE size_t __bsr(size_t v) {
+ size_t r = 0; asm ("bsr %1,%0" : "=r"(r) : "r"(v)); return r;
+}
+
+INLINE size_t __btc(size_t v, size_t i) {
+ size_t r = 0; asm ("btc %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r;
+}
+
+INLINE size_t __bts(size_t v, size_t i) {
+ size_t r = 0; asm ("bts %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r;
+}
+
+INLINE size_t __btr(size_t v, size_t i) {
+ size_t r = 0; asm ("btr %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r;
+}
+
+INLINE void memoryFence(void) { _mm_mfence(); }
+
+typedef int32_t atomic32_t;
+
+INLINE int32_t atomic_add(int32_t volatile* value, int32_t input)
+{ asm volatile("lock xadd %0,%1" : "+r" (input), "+m" (*value) : "r" (input), "m" (*value)); return input; }
+
+INLINE int32_t atomic_cmpxchg(int32_t volatile* value, const int32_t input, int32_t comparand)
+{ asm volatile("lock cmpxchg %2,%0" : "=m" (*value), "=a" (comparand) : "r" (input), "m" (*value), "a" (comparand) : "flags"); return comparand; }
+
+#if defined(__X86_64__)
+
+ typedef int64_t atomic_t;
+
+ INLINE int64_t atomic_add(int64_t volatile* value, int64_t input)
+ { asm volatile("lock xaddq %0,%1" : "+r" (input), "+m" (*value) : "r" (input), "m" (*value)); return input; }
+
+ INLINE int64_t atomic_cmpxchg(int64_t volatile* value, const int64_t input, int64_t comparand)
+ { asm volatile("lock cmpxchgq %2,%0" : "+m" (*value), "+a" (comparand) : "r" (input), "m" (*value), "r" (comparand) : "flags"); return comparand; }
+
+#else
+
+ typedef int32_t atomic_t;
+
+#endif /* defined(__X86_64__) */
+
+#define GBE_COMPILER_READ_WRITE_BARRIER asm volatile("" ::: "memory");
+#define GBE_COMPILER_WRITE_BARRIER GBE_COMPILER_READ_WRITE_BARRIER
+#define GBE_COMPILER_READ_BARRIER GBE_COMPILER_READ_WRITE_BARRIER
+
+#endif /* __MSVC__ */
+
+template <typename T>
+INLINE T __load_acquire(volatile T *ptr)
+{
+ GBE_COMPILER_READ_WRITE_BARRIER;
+ T x = *ptr; // for x86, load == load_acquire
+ GBE_COMPILER_READ_WRITE_BARRIER;
+ return x;
+}
+
+template <typename T>
+INLINE void __store_release(volatile T *ptr, T x)
+{
+ GBE_COMPILER_READ_WRITE_BARRIER;
+ *ptr = x; // for x86, store == store_release
+ GBE_COMPILER_READ_WRITE_BARRIER;
+}
+#endif /* __GBE_INTRINSICS_HPP__ */
+
diff --git a/backend/src/sys/intrusive_list.cpp b/backend/src/sys/intrusive_list.cpp
new file mode 100644
index 0000000..ed7067c
--- /dev/null
+++ b/backend/src/sys/intrusive_list.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2007 Maciej Sinilo
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "intrusive_list.hpp"
+
+namespace gbe
+{
+ intrusive_list_base::intrusive_list_base() : m_root() {}
+
+ intrusive_list_base::size_type intrusive_list_base::size() const {
+ size_type numNodes(0);
+ const intrusive_list_node* iter = &m_root;
+ do {
+ iter = iter->next;
+ ++numNodes;
+ } while (iter != &m_root);
+ return numNodes - 1;
+ }
+
+ void append(intrusive_list_node *node, intrusive_list_node *prev) {
+ GBE_ASSERT(!node->in_list());
+ node->next = prev->next;
+ node->next->prev = node;
+ prev->next = node;
+ node->prev = prev;
+ }
+
+ void prepend(intrusive_list_node *node, intrusive_list_node *next) {
+ GBE_ASSERT(!node->in_list());
+ node->prev = next->prev;
+ node->prev->next = node;
+ next->prev = node;
+ node->next = next;
+ }
+
+ void link(intrusive_list_node* node, intrusive_list_node* nextNode) {
+ prepend(node, nextNode);
+ }
+
+ void unlink(intrusive_list_node* node) {
+ GBE_ASSERT(node->in_list());
+ node->prev->next = node->next;
+ node->next->prev = node->prev;
+ node->next = node->prev = node;
+ }
+} /* namespace gbe */
+
diff --git a/backend/src/sys/intrusive_list.hpp b/backend/src/sys/intrusive_list.hpp
new file mode 100644
index 0000000..2e2f2a9
--- /dev/null
+++ b/backend/src/sys/intrusive_list.hpp
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2007 Maciej Sinilo
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef __GBE_INTRUSIVE_LIST_HPP__
+#define __GBE_INTRUSIVE_LIST_HPP__
+
+#include "sys/platform.hpp"
+
+namespace gbe
+{
+ /*! List elements must inherit from it */
+ struct intrusive_list_node
+ {
+ INLINE intrusive_list_node(void) { next = prev = this; }
+ INLINE bool in_list(void) const { return this != next; }
+ intrusive_list_node *next;
+ intrusive_list_node *prev;
+ };
+
+ /*! Insert node such that prev -> node */
+ void append(intrusive_list_node *node, intrusive_list_node *prev);
+ /*! Insert node such that node -> next */
+ void prepend(intrusive_list_node *node, intrusive_list_node *next);
+ /*! Same as prepend */
+ void link(intrusive_list_node* node, intrusive_list_node* nextNode);
+ /*! Remove the node from its current list */
+ void unlink(intrusive_list_node* node);
+
+ template<typename Pointer, typename Reference>
+ class intrusive_list_iterator
+ {
+ public:
+ typedef Pointer pointer;
+ typedef Reference reference;
+
+ INLINE intrusive_list_iterator(void): m_node(0) {}
+ INLINE intrusive_list_iterator(Pointer iterNode) : m_node(iterNode) {}
+
+ INLINE Reference operator*(void) const {
+ GBE_ASSERT(m_node);
+ return *m_node;
+ }
+ INLINE Pointer operator->(void) const { return m_node; }
+ INLINE Pointer node(void) const { return m_node; }
+
+ INLINE intrusive_list_iterator& operator++(void) {
+ m_node = static_cast<Pointer>(m_node->next);
+ return *this;
+ }
+ INLINE intrusive_list_iterator& operator--(void) {
+ m_node = static_cast<Pointer>(m_node->prev);
+ return *this;
+ }
+ INLINE intrusive_list_iterator operator++(int) {
+ intrusive_list_iterator copy(*this);
+ ++(*this);
+ return copy;
+ }
+ INLINE intrusive_list_iterator operator--(int) {
+ intrusive_list_iterator copy(*this);
+ --(*this);
+ return copy;
+ }
+
+ INLINE bool operator== (const intrusive_list_iterator& rhs) const {
+ return rhs.m_node == m_node;
+ }
+ INLINE bool operator!= (const intrusive_list_iterator& rhs) const {
+ return !(rhs == *this);
+ }
+ private:
+ Pointer m_node;
+ };
+
+ class intrusive_list_base
+ {
+ public:
+ typedef size_t size_type;
+
+ INLINE void pop_back(void) { unlink(m_root.prev); }
+ INLINE void pop_front(void) { unlink(m_root.next); }
+ INLINE bool empty(void) const { return !m_root.in_list(); }
+ size_type size(void) const;
+
+ protected:
+ intrusive_list_base(void);
+ INLINE ~intrusive_list_base(void) {}
+
+ intrusive_list_node m_root;
+
+ private:
+ intrusive_list_base(const intrusive_list_base&);
+ intrusive_list_base& operator=(const intrusive_list_base&);
+ };
+
+ template<class T>
+ class intrusive_list : public intrusive_list_base
+ {
+ public:
+ typedef T node_type;
+ typedef T value_type;
+ typedef intrusive_list_iterator<T*, T&> iterator;
+ typedef intrusive_list_iterator<const T*, const T&> const_iterator;
+
+ intrusive_list(void) : intrusive_list_base() {
+ intrusive_list_node* testNode((T*)0);
+ static_cast<void>(sizeof(testNode));
+ }
+
+ void push_back(value_type* v) { link(v, &m_root); }
+ void push_front(value_type* v) { link(v, m_root.next); }
+
+ iterator begin(void) { return iterator(upcast(m_root.next)); }
+ iterator end(void) { return iterator(upcast(&m_root)); }
+ iterator rbegin(void) { return iterator(upcast(m_root.prev)); }
+ iterator rend(void) { return iterator(upcast(&m_root)); }
+ const_iterator begin(void) const { return const_iterator(upcast(m_root.next)); }
+ const_iterator end(void) const { return const_iterator(upcast(&m_root)); }
+ const_iterator rbegin(void) const { return const_iterator(upcast(m_root.prev)); }
+ const_iterator rend(void) const { return const_iterator(upcast(&m_root)); }
+
+ INLINE value_type* front(void) { return upcast(m_root.next); }
+ INLINE value_type* back(void) { return upcast(m_root.prev); }
+ INLINE const value_type* front(void) const { return upcast(m_root.next); }
+ INLINE const value_type* back(void) const { return upcast(m_root.prev); }
+
+ iterator insert(iterator pos, value_type* v) {
+ link(v, pos.node());
+ return iterator(v);
+ }
+ iterator erase(iterator it) {
+ iterator itErase(it);
+ ++it;
+ unlink(itErase.node());
+ return it;
+ }
+ iterator erase(iterator first, iterator last) {
+ while (first != last) first = erase(first);
+ return first;
+ }
+
+ void clear(void) { erase(begin(), end()); }
+ void fast_clear(void) { m_root.next = m_root.prev = &m_root; }
+ static void remove(value_type* v) { unlink(v); }
+
+ private:
+ static INLINE node_type* upcast(intrusive_list_node* n) {
+ return static_cast<node_type*>(n);
+ }
+ static INLINE const node_type* upcast(const intrusive_list_node* n) {
+ return static_cast<const node_type*>(n);
+ }
+ };
+} /* namespace gbe */
+
+#endif /* __GBE_INTRUSIVE_LIST_HPP__ */
+
diff --git a/backend/src/sys/list.hpp b/backend/src/sys/list.hpp
new file mode 100644
index 0000000..51b9c39
--- /dev/null
+++ b/backend/src/sys/list.hpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file list.hpp
+ *
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#ifndef __GBE_LIST_HPP__
+#define __GBE_LIST_HPP__
+
+#include "sys/platform.hpp"
+#include <list>
+
+namespace gbe
+{
+ /*! Use custom allocator instead of std one */
+ template <typename T>
+ class list : public std::list<T, Allocator<T>>
+ {
+ public:
+ // Typedefs
+ typedef T value_type;
+ typedef Allocator<value_type> allocator_type;
+ typedef std::list<T, allocator_type> parent_type;
+ typedef typename allocator_type::size_type size_type;
+
+ /*! Default constructor */
+ INLINE explicit list(const allocator_type &a = allocator_type()) :
+ parent_type(a) {}
+ /*! Repetitive constructor */
+ INLINE explicit list(size_type n,
+ const T &value = T(),
+ const allocator_type &a = allocator_type()) :
+ parent_type(n, value, a) {}
+ /*! Iteration constructor */
+ template <class InputIterator>
+ INLINE list(InputIterator first,
+ InputIterator last,
+ const allocator_type &a = allocator_type()) :
+ parent_type(first, last, a) {}
+ /*! Copy constructor */
+ INLINE list(const list &x) : parent_type(x) {}
+ GBE_CLASS(list);
+ };
+} /* namespace gbe */
+
+#endif /* __GBE_LIST_HPP__ */
+
diff --git a/backend/src/sys/map.hpp b/backend/src/sys/map.hpp
new file mode 100644
index 0000000..1c72400
--- /dev/null
+++ b/backend/src/sys/map.hpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file map.hpp
+ *
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#ifndef __GBE_MAP_HPP__
+#define __GBE_MAP_HPP__
+
+#include "sys/platform.hpp"
+#include <map>
+
+namespace gbe
+{
+ /*! Use custom allocator instead of std one */
+ template<class Key, class T, class Pred = std::less<Key>>
+ class map : public std::map<Key,T,Pred,Allocator<std::pair<const Key, T>>>,
+ public NonCopyable
+ {
+ public:
+ // Typedefs
+ typedef std::pair<const Key, T> value_type;
+ typedef Allocator<value_type> allocator_type;
+ typedef std::map<Key,T,Pred,allocator_type> parent_type;
+ typedef Key key_type;
+ typedef T mapped_type;
+ typedef Pred key_compare;
+ typedef typename allocator_type::pointer pointer;
+ typedef typename allocator_type::const_pointer const_pointer;
+ typedef typename allocator_type::reference reference;
+ typedef typename allocator_type::const_reference const_reference;
+
+ /*! Default constructor */
+ INLINE map(const key_compare &comp = key_compare(),
+ const allocator_type &a = allocator_type()) :
+ parent_type(comp, a) {}
+ /*! Iteration constructor */
+ template<class InputIterator>
+ INLINE map(InputIterator first,
+ InputIterator last,
+ const key_compare &comp = key_compare(),
+ const allocator_type& a = allocator_type()) :
+ parent_type(first, last, comp, a) {}
+#if 0
+ /*! Copy constructor */
+ INLINE map(const map& x) : parent_type(x) {}
+#endif
+ /*! Better than using find if we do not care about the iterator itself */
+ INLINE bool contains(const Key &key) const {
+ return this->find(key) != this->end();
+ }
+ GBE_CLASS(map);
+ };
+} /* namespace gbe */
+
+#endif /* __GBE_MAP_HPP__ */
+
diff --git a/backend/src/sys/mutex.cpp b/backend/src/sys/mutex.cpp
new file mode 100644
index 0000000..9640150
--- /dev/null
+++ b/backend/src/sys/mutex.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "sys/mutex.hpp"
+
+#if defined(__WIN32__)
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+namespace gbe
+{
+ /*! system mutex using windows API */
+ MutexSys::MutexSys( void ) { mutex = new CRITICAL_SECTION; InitializeCriticalSection((CRITICAL_SECTION*)mutex); }
+ MutexSys::~MutexSys( void ) { DeleteCriticalSection((CRITICAL_SECTION*)mutex); delete ((CRITICAL_SECTION*)mutex); }
+ void MutexSys::lock( void ) { EnterCriticalSection((CRITICAL_SECTION*)mutex); }
+ void MutexSys::unlock( void ) { LeaveCriticalSection((CRITICAL_SECTION*)mutex); }
+}
+#endif
+
+#if defined(__UNIX__)
+#include <pthread.h>
+
+namespace gbe
+{
+ /*! system mutex using pthreads */
+ MutexSys::MutexSys( void ) { mutex = new pthread_mutex_t; pthread_mutex_init((pthread_mutex_t*)mutex, NULL); }
+ MutexSys::~MutexSys( void ) { pthread_mutex_destroy((pthread_mutex_t*)mutex); delete ((pthread_mutex_t*)mutex); }
+ void MutexSys::lock( void ) { pthread_mutex_lock((pthread_mutex_t*)mutex); }
+ void MutexSys::unlock( void ) { pthread_mutex_unlock((pthread_mutex_t*)mutex); }
+}
+#endif
+
diff --git a/backend/src/sys/mutex.hpp b/backend/src/sys/mutex.hpp
new file mode 100644
index 0000000..c8e3f2f
--- /dev/null
+++ b/backend/src/sys/mutex.hpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef __GBE_MUTEX_HPP__
+#define __GBE_MUTEX_HPP__
+
+#include "platform.hpp"
+#include "atomic.hpp"
+#include <xmmintrin.h>
+
+namespace gbe
+{
+ class MutexSys {
+ friend class ConditionSys;
+ public:
+ MutexSys(void);
+ ~MutexSys(void);
+ void lock(void);
+ void unlock(void);
+ protected:
+ void* mutex;
+ MutexSys(const MutexSys&); // don't implement
+ MutexSys& operator= (const MutexSys&); // don't implement
+ GBE_CLASS(MutexSys);
+ };
+
+ /*! active mutex */
+ class MutexActive {
+ public:
+ INLINE MutexActive(void) : $lock(LOCK_IS_FREE) {}
+ INLINE void lock(void) {
+ GBE_COMPILER_READ_BARRIER;
+ while (cmpxchg($lock, LOCK_IS_TAKEN, LOCK_IS_FREE) != LOCK_IS_FREE)
+ _mm_pause();
+ GBE_COMPILER_READ_BARRIER;
+ }
+ INLINE void unlock(void) { $lock.storeRelease(LOCK_IS_FREE); }
+ protected:
+ enum ${ LOCK_IS_FREE = 0, LOCK_IS_TAKEN = 1 };
+ Atomic $lock;
+ MutexActive(const MutexActive&); // don't implement
+ MutexActive& operator=(const MutexActive&); // don't implement
+ GBE_CLASS(MutexActive);
+ };
+
+ /*! safe mutex lock and unlock helper */
+ template<typename Mutex> class Lock {
+ public:
+ Lock (Mutex& mutex) : mutex(mutex) { mutex.lock(); }
+ ~Lock() { mutex.unlock(); }
+ protected:
+ Mutex& mutex;
+ Lock(const Lock&); // don't implement
+ Lock& operator= (const Lock&); // don't implement
+ GBE_CLASS(Lock);
+ };
+}
+
+#endif /* __GBE_MUTEX_HPP__ */
diff --git a/backend/src/sys/platform.cpp b/backend/src/sys/platform.cpp
new file mode 100644
index 0000000..95768ee
--- /dev/null
+++ b/backend/src/sys/platform.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "sys/platform.hpp"
+#include "sys/intrinsics.hpp"
+#include <string>
+
+////////////////////////////////////////////////////////////////////////////////
+/// Windows Platform
+////////////////////////////////////////////////////////////////////////////////
+
+#ifdef __WIN32__
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+namespace gbe
+{
+ double getSeconds() {
+ LARGE_INTEGER freq, val;
+ QueryPerformanceFrequency(&freq);
+ QueryPerformanceCounter(&val);
+ return (double)val.QuadPart / (double)freq.QuadPart;
+ }
+
+ void FATAL(const std::string &msg) {
+ std::cerr << msg << std::endl;
+ MessageBox(NULL, msg.c_str(), "Fatal Error", MB_OK | MB_ICONEXCLAMATION);
+ GBE_ASSERT(0);
+#ifdef __GNUC__
+ exit(-1);
+#else
+ _exit(-1);
+#endif /* __GNUC__ */
+ }
+
+} /* namespace gbe */
+#endif /* __WIN32__ */
+
+////////////////////////////////////////////////////////////////////////////////
+/// Unix Platform
+////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__UNIX__)
+
+#include <sys/time.h>
+#include <unistd.h>
+
+namespace gbe
+{
+ double getSeconds() {
+ struct timeval tp; gettimeofday(&tp,NULL);
+ return double(tp.tv_sec) + double(tp.tv_usec)/1E6;
+ }
+
+ void FATAL(const std::string &msg) {
+ std::cerr << msg << std::endl;
+ GBE_ASSERT(0);
+ _exit(-1);
+ }
+} /* namespace gbe */
+
+#endif /* __UNIX__ */
+
diff --git a/backend/src/sys/platform.hpp b/backend/src/sys/platform.hpp
new file mode 100644
index 0000000..c52ae90
--- /dev/null
+++ b/backend/src/sys/platform.hpp
@@ -0,0 +1,390 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __GBE_PLATFORM_HPP__
+#define __GBE_PLATFORM_HPP__
+
+#include <cstddef>
+#include <cstdlib>
+#include <cstdio>
+#include <iostream>
+#include <cassert>
+#include <new>
+
+////////////////////////////////////////////////////////////////////////////////
+/// CPU architecture
+////////////////////////////////////////////////////////////////////////////////
+
+/* detect 32 or 64 platform */
+#if defined(__x86_64__) || defined(__ia64__) || defined(_M_X64)
+#define __X86_64__
+#else
+#define __X86__
+#endif
+
+/* We require SSE ... */
+#ifndef __SSE__
+#define __SSE__
+#endif
+
+/* ... and SSE2 */
+#ifndef __SSE2__
+#define __SSE2__
+#endif
+
+#if defined(_INCLUDED_IMM)
+// #define __AVX__
+#endif
+
+#if defined(_MSC_VER) && (_MSC_VER < 1600) && !defined(__INTEL_COMPILER) || defined(_DEBUG) && defined(_WIN32)
+#define __NO_AVX__
+#endif
+
+#if defined(_MSC_VER) && !defined(__SSE4_2__)
+// #define __SSE4_2__ //! activates SSE4.2 support
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+/// Operating system
+////////////////////////////////////////////////////////////////////////////////
+
+/* detect Linux platform */
+#if defined(linux) || defined(__linux__) || defined(__LINUX__)
+# if !defined(__LINUX__)
+# define __LINUX__
+# endif
+# if !defined(__UNIX__)
+# define __UNIX__
+# endif
+#endif
+
+/* detect FreeBSD platform */
+#if defined(__FreeBSD__) || defined(__FREEBSD__)
+# if !defined(__FREEBSD__)
+# define __FREEBSD__
+# endif
+# if !defined(__UNIX__)
+# define __UNIX__
+# endif
+#endif
+
+/* detect Windows 95/98/NT/2000/XP/Vista/7 platform */
+#if (defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(__NT__)) && !defined(__CYGWIN__)
+# if !defined(__WIN32__)
+# define __WIN32__
+# endif
+#endif
+
+/* detect Cygwin platform */
+#if defined(__CYGWIN__)
+# if !defined(__UNIX__)
+# define __UNIX__
+# endif
+#endif
+
+/* detect MAC OS X platform */
+#if defined(__APPLE__) || defined(MACOSX) || defined(__MACOSX__)
+# if !defined(__MACOSX__)
+# define __MACOSX__
+# endif
+# if !defined(__UNIX__)
+# define __UNIX__
+# endif
+#endif
+
+/* try to detect other Unix systems */
+#if defined(__unix__) || defined (unix) || defined(__unix) || defined(_unix)
+# if !defined(__UNIX__)
+# define __UNIX__
+# endif
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+/// Compiler
+////////////////////////////////////////////////////////////////////////////////
+
+/*! GCC compiler */
+#ifdef __GNUC__
+// #define __GNUC__
+#endif
+
+/*! Intel compiler */
+#ifdef __INTEL_COMPILER
+#define __ICC__
+#endif
+
+/*! Visual C compiler */
+#ifdef _MSC_VER
+#define __MSVC__
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+/// Makros
+////////////////////////////////////////////////////////////////////////////////
+
+#ifdef __WIN32__
+#define __dllexport extern "C" __declspec(dllexport)
+#define __dllimport extern "C" __declspec(dllimport)
+#else
+#define __dllexport extern "C"
+#define __dllimport extern "C"
+#endif
+
+#ifdef __MSVC__
+#undef NOINLINE
+#define NOINLINE __declspec(noinline)
+#define INLINE __forceinline
+#define RESTRICT __restrict
+#define THREAD __declspec(thread)
+#define ALIGNED(...) __declspec(align(__VA_ARGS__))
+//#define __FUNCTION__ __FUNCTION__
+#define DEBUGBREAK() __debugbreak()
+#else
+#undef NOINLINE
+#undef INLINE
+#define NOINLINE __attribute__((noinline))
+#define INLINE inline __attribute__((always_inline))
+#define RESTRICT __restrict
+#define THREAD __thread
+#define ALIGNED(...) __attribute__((aligned(__VA_ARGS__)))
+#define __FUNCTION__ __PRETTY_FUNCTION__
+#define DEBUGBREAK() asm ("int $3")
+#endif
+
+/*! Modern x86 processors */
+#define CACHE_LINE 64
+#define CACHE_LINE_ALIGNED ALIGNED(CACHE_LINE)
+
+#ifdef __GNUC__
+ #define MAYBE_UNUSED __attribute__((used))
+#else
+ #define MAYBE_UNUSED
+#endif
+
+#if defined(_MSC_VER)
+#define __builtin_expect(expr,b) expr
+#endif
+
+/*! Debug syntactic sugar */
+#if GBE_DEBUG
+#define IF_DEBUG(EXPR) EXPR
+#else
+#define IF_DEBUG(EXPR)
+#endif /* GBE_DEBUG */
+
+/*! Debug printing macros */
+#define STRING(x) #x
+#define PING std::cout << __FILE__ << " (" << __LINE__ << "): " << __FUNCTION__ << std::endl
+#define PRINT(x) std::cout << STRING(x) << " = " << (x) << std::endl
+
+/*! Branch hint */
+#define LIKELY(x) __builtin_expect(!!(x),1)
+#define UNLIKELY(x) __builtin_expect((x),0)
+
+/*! Stringify macros */
+#define JOIN(X, Y) _DO_JOIN(X, Y)
+#define _DO_JOIN(X, Y) _DO_JOIN2(X, Y)
+#define _DO_JOIN2(X, Y) X##Y
+
+/*! Run-time assertion */
+#if GBE_DEBUG
+#define GBE_ASSERT(EXPR) do { \
+ if (UNLIKELY(!(EXPR))) \
+ gbe::onFailedAssertion(#EXPR, __FILE__, __FUNCTION__, __LINE__); \
+} while (0)
+#define GBE_ASSERTM(EXPR, MSG) do { \
+ if (UNLIKELY(!(EXPR))) \
+ gbe::onFailedAssertion(MSG, __FILE__, __FUNCTION__, __LINE__); \
+} while (0)
+#else
+#define GBE_ASSERT(EXPR) do { } while (0)
+#define GBE_ASSERTM(EXPR, MSG) do { } while (0)
+#endif /* GBE_DEBUG */
+
+#define NOT_IMPLEMENTED GBE_ASSERTM (false, "Not implemented")
+#define NOT_SUPPORTED GBE_ASSERTM (false, "Not supported")
+
+/*! Fatal error macros */
+#define FATAL_IF(COND, MSG) \
+do { \
+ if(UNLIKELY(COND)) FATAL(MSG); \
+} while (0)
+
+/* Safe deletion macros */
+#define GBE_SAFE_DELETE_ARRAY(x) do { if (x != NULL) GBE_DELETE_ARRAY(x); } while (0)
+#define GBE_SAFE_DELETE(x) do { if (x != NULL) GBE_DELETE(x); } while (0)
+
+/* Number of elements in an array */
+#define ARRAY_ELEM_NUM(x) (sizeof(x) / sizeof(x[0]))
+
+/* Align X on A */
+#define ALIGN(X,A) (((X) % (A)) ? ((X) + (A) - ((X) % (A))) : (X))
+
+/*! Produce a string from the macro locatiom */
+#define HERE (STRING(__LINE__) "@" __FILE__)
+
+/*! Typesafe encapusalation of a type (mostly for integers) */
+#define TYPE_SAFE(SAFE, UNSAFE) \
+class SAFE \
+{ \
+public: \
+ INLINE SAFE(void) {} \
+ explicit INLINE SAFE(uint16_t unsafe) : unsafe(unsafe) {} \
+ INLINE operator UNSAFE (void) const { return unsafe; } \
+ UNSAFE value(void) const { return unsafe; } \
+private: \
+ UNSAFE unsafe; \
+};
+
+/*! Default alignment for the platform */
+#define GBE_DEFAULT_ALIGNMENT 16
+
+/*! Useful constants */
+#define KB 1024
+#define MB (KB*KB)
+
+/*! Portable AlignOf */
+template <typename T>
+struct AlignOf {
+ struct Helper { char x; T t; };
+ enum { value = offsetof(Helper, t) };
+};
+
+////////////////////////////////////////////////////////////////////////////////
+/// Visibility parameters (DLL export and so on)
+////////////////////////////////////////////////////////////////////////////////
+#if defined __WIN32__
+ #if defined __GNUC__
+ #define GBE_EXPORT_SYMBOL __attribute__ ((dllexport))
+ #define GBE_IMPORT_SYMBOL __attribute__ ((dllimport))
+ #else
+ #define GBE_IMPORT_SYMBOL __declspec(dllimport)
+ #define GBE_EXPORT_SYMBOL __declspec(dllexport)
+ #endif /* __GNUC__ */
+#else
+ #define GBE_EXPORT_SYMBOL __attribute__ ((visibility ("default")))
+ #define GBE_IMPORT_SYMBOL
+#endif /* __WIN32__ */
+
+////////////////////////////////////////////////////////////////////////////////
+/// Basic Types
+////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__MSVC__)
+typedef __int64_t int64_t;
+typedef unsigned __int64_t uint64_t;
+typedef __int32_t int32_t;
+typedef unsigned __int32_t uint32_t;
+typedef __int16_t int16_t;
+typedef unsigned __int16_t uint16_t;
+typedef __int8_t int8_t;
+typedef unsigned __int8_t uint8_t;
+#else
+#include <cstdint>
+#endif
+
+#if defined(__X86_64__)
+typedef int64_t index_t;
+#else
+typedef int32_t index_t;
+#endif
+
+/*! To protect some classes from being copied */
+class NonCopyable
+{
+protected:
+ INLINE NonCopyable(void) {}
+ INLINE ~NonCopyable(void) {}
+private:
+ INLINE NonCopyable(const NonCopyable&) {}
+ INLINE NonCopyable& operator= (const NonCopyable&) {return *this;}
+};
+
+////////////////////////////////////////////////////////////////////////////////
+/// Disable some compiler warnings
+////////////////////////////////////////////////////////////////////////////////
+
+#ifdef __ICC__
+#pragma warning(disable:265) // floating-point operation result is out of range
+#pragma warning(disable:383) // value copied to temporary, reference to temporary used
+#pragma warning(disable:869) // parameter was never referenced
+#pragma warning(disable:981) // operands are evaluated in unspecified order
+#pragma warning(disable:1418) // external function definition with no prior declaration
+#pragma warning(disable:1419) // external declaration in primary source file
+#pragma warning(disable:1572) // floating-point equality and inequality comparisons are unreliable
+#pragma warning(disable:1125) // virtual function override intended?
+#endif /* __ICC__ */
+
+////////////////////////////////////////////////////////////////////////////////
+/// Default Includes and Functions
+////////////////////////////////////////////////////////////////////////////////
+
+#include "sys/alloc.hpp"
+
+namespace gbe
+{
+ /*! selects */
+ INLINE bool select(bool s, bool t , bool f) { return s ? t : f; }
+ INLINE int select(bool s, int t, int f) { return s ? t : f; }
+ INLINE float select(bool s, float t, float f) { return s ? t : f; }
+
+ /*! Fatal error function */
+ void FATAL(const std::string&);
+
+ /*! Return the next power of 2 */
+ INLINE uint32_t nextHighestPowerOf2(uint32_t x) {
+ x--;
+ x |= x >> 1;
+ x |= x >> 2;
+ x |= x >> 4;
+ x |= x >> 8;
+ x |= x >> 16;
+ return ++x;
+ }
+
+ INLINE uint32_t logi2(uint32_t x) {
+ uint32_t r = 0;
+ while(x >>= 1) r++;
+ return r;
+ }
+
+ template<uint32_t N>
+ INLINE uint32_t isPowerOf(uint32_t i) {
+ while (i > 1) {
+ if (i%N) return false;
+ i = i/N;
+ }
+ return true;
+ }
+ template<> INLINE uint32_t isPowerOf<2>(uint32_t i) { return ((i-1)&i) == 0; }
+
+ /*! random functions */
+ template<typename T> T random() { return T(0); }
+ template<> INLINE int32_t random() { return int(rand()); }
+ template<> INLINE uint32_t random() { return uint32_t(rand()); }
+ template<> INLINE float random() { return random<uint32_t>()/float(RAND_MAX); }
+ template<> INLINE double random() { return random<uint32_t>()/double(RAND_MAX); }
+
+ /** returns performance counter in seconds */
+ double getSeconds();
+
+} /* namespace gbe */
+
+#endif /* __GBE_PLATFORM_HPP__ */
+
diff --git a/backend/src/sys/set.hpp b/backend/src/sys/set.hpp
new file mode 100644
index 0000000..db68807
--- /dev/null
+++ b/backend/src/sys/set.hpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file set.hpp
+ *
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#ifndef __GBE_SET_HPP__
+#define __GBE_SET_HPP__
+
+#include "sys/platform.hpp"
+#include <set>
+
+namespace gbe
+{
+ /*! Add our custom allocator to std::set */
+ template<class Key, class Pred = std::less<Key>>
+ class set : public std::set<Key,Pred,Allocator<Key>>, public NonCopyable
+ {
+ public:
+ // Typedefs
+ typedef Key value_type;
+ typedef Allocator<value_type> allocator_type;
+ typedef std::set<Key,Pred,Allocator<Key>> parent_type;
+ typedef Key key_type;
+ typedef Pred key_compare;
+
+ /*! Default constructor */
+ INLINE set(const key_compare &comp = key_compare(),
+ const allocator_type &a = allocator_type()) :
+ parent_type(comp, a) {}
+ /*! Iteration constructor */
+ template<class InputIterator>
+ INLINE set(InputIterator first,
+ InputIterator last,
+ const key_compare &comp = key_compare(),
+ const allocator_type& a = allocator_type()) :
+ parent_type(first, last, comp, a) {}
+#if 0
+ /*! Copy constructor */
+ INLINE set(const set& x) : parent_type(x) {}
+#endif
+ /*! Better than using find if we do not care about the iterator itself */
+ INLINE bool contains(const Key &key) const {
+ return this->find(key) != this->end();
+ }
+ GBE_CLASS(set);
+ };
+
+} /* namespace gbe */
+
+#endif /* __GBE_SET_HPP__ */
+
diff --git a/backend/src/sys/vector.hpp b/backend/src/sys/vector.hpp
new file mode 100644
index 0000000..dc89991
--- /dev/null
+++ b/backend/src/sys/vector.hpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file vector.hpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __GBE_VECTOR_HPP__
+#define __GBE_VECTOR_HPP__
+
+#include "sys/platform.hpp"
+#include <vector>
+
+namespace gbe
+{
+ /*! Add bound checks to the standard vector class and use the internal
+ * allocator
+ */
+ template<class T>
+ class vector : public std::vector<T, Allocator<T>>
+ {
+ public:
+ // Typedefs
+ typedef std::vector<T, Allocator<T>> parent_type;
+ typedef Allocator<T> allocator_type;
+ typedef typename allocator_type::size_type size_type;
+ typedef typename parent_type::iterator iterator;
+
+ /*! Default constructor */
+ INLINE explicit vector(const allocator_type &a = allocator_type()) :
+ parent_type(a) {}
+#if 0
+ /*! Copy constructor */
+ INLINE vector(const vector &x) : parent_type(x) {}
+#endif
+ /*! Repetitive sequence constructor */
+ INLINE explicit vector(size_type n,
+ const T& value= T(),
+ const allocator_type &a = allocator_type()) :
+ parent_type(n, value, a) {}
+ /*! Iteration constructor */
+ template <class InputIterator>
+ INLINE vector(InputIterator first,
+ InputIterator last,
+ const allocator_type &a = allocator_type()) :
+ parent_type(first, last, a) {}
+ /*! Get element at position index (with a bound check) */
+ T &operator[] (size_t index) {
+ GBE_ASSERT(index < this->size());
+ return parent_type::operator[] (index);
+ }
+ /*! Get element at position index (with a bound check) */
+ const T &operator[] (size_t index) const {
+ GBE_ASSERT(index < this->size());
+ return parent_type::operator[] (index);
+ }
+ GBE_CLASS(vector);
+ };
+} /* namespace gbe */
+
+#endif /* __GBE_VECTOR_HPP__ */
+
diff --git a/include/CL/cl.h b/include/CL/cl.h
new file mode 100644
index 0000000..8201afc
--- /dev/null
+++ b/include/CL/cl.h
@@ -0,0 +1,994 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/* $Revision: 11985 $ on $Date: 2010-07-15 11:16:06 -0700 (Thu, 15 Jul 2010) $ */
+
+#ifndef __OPENCL_CL_H
+#define __OPENCL_CL_H
+
+#ifdef __APPLE__
+#include <OpenCL/cl_platform.h>
+#else
+#include <CL/cl_platform.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/******************************************************************************/
+
+typedef struct _cl_platform_id * cl_platform_id;
+typedef struct _cl_device_id * cl_device_id;
+typedef struct _cl_context * cl_context;
+typedef struct _cl_command_queue * cl_command_queue;
+typedef struct _cl_mem * cl_mem;
+typedef struct _cl_program * cl_program;
+typedef struct _cl_kernel * cl_kernel;
+typedef struct _cl_event * cl_event;
+typedef struct _cl_sampler * cl_sampler;
+
+typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */
+typedef cl_ulong cl_bitfield;
+typedef cl_bitfield cl_device_type;
+typedef cl_uint cl_platform_info;
+typedef cl_uint cl_device_info;
+typedef cl_bitfield cl_device_fp_config;
+typedef cl_uint cl_device_mem_cache_type;
+typedef cl_uint cl_device_local_mem_type;
+typedef cl_bitfield cl_device_exec_capabilities;
+typedef cl_bitfield cl_command_queue_properties;
+
+typedef intptr_t cl_context_properties;
+typedef cl_uint cl_context_info;
+typedef cl_uint cl_command_queue_info;
+typedef cl_uint cl_channel_order;
+typedef cl_uint cl_channel_type;
+typedef cl_bitfield cl_mem_flags;
+typedef cl_uint cl_mem_type;
+typedef cl_uint cl_mem_info;
+typedef cl_uint cl_image_info;
+typedef cl_uint cl_buffer_create_type;
+typedef cl_uint cl_addressing_mode;
+typedef cl_uint cl_filter_mode;
+typedef cl_uint cl_sampler_info;
+typedef cl_bitfield cl_map_flags;
+typedef cl_uint cl_program_info;
+typedef cl_uint cl_program_build_info;
+typedef cl_int cl_build_status;
+typedef cl_uint cl_kernel_info;
+typedef cl_uint cl_kernel_work_group_info;
+typedef cl_uint cl_event_info;
+typedef cl_uint cl_command_type;
+typedef cl_uint cl_profiling_info;
+
+typedef struct _cl_image_format {
+ cl_channel_order image_channel_order;
+ cl_channel_type image_channel_data_type;
+} cl_image_format;
+
+
+typedef struct _cl_buffer_region {
+ size_t origin;
+ size_t size;
+} cl_buffer_region;
+
+/******************************************************************************/
+
+/* Error Codes */
+#define CL_SUCCESS 0
+#define CL_DEVICE_NOT_FOUND -1
+#define CL_DEVICE_NOT_AVAILABLE -2
+#define CL_COMPILER_NOT_AVAILABLE -3
+#define CL_MEM_ALLOCATION_FAILURE -4
+#define CL_OUT_OF_RESOURCES -5
+#define CL_OUT_OF_HOST_MEMORY -6
+#define CL_PROFILING_INFO_NOT_AVAILABLE -7
+#define CL_MEM_COPY_OVERLAP -8
+#define CL_IMAGE_FORMAT_MISMATCH -9
+#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10
+#define CL_BUILD_PROGRAM_FAILURE -11
+#define CL_MAP_FAILURE -12
+#define CL_MISALIGNED_SUB_BUFFER_OFFSET -13
+#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14
+
+#define CL_INVALID_VALUE -30
+#define CL_INVALID_DEVICE_TYPE -31
+#define CL_INVALID_PLATFORM -32
+#define CL_INVALID_DEVICE -33
+#define CL_INVALID_CONTEXT -34
+#define CL_INVALID_QUEUE_PROPERTIES -35
+#define CL_INVALID_COMMAND_QUEUE -36
+#define CL_INVALID_HOST_PTR -37
+#define CL_INVALID_MEM -38
+#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39
+#define CL_INVALID_IMAGE_SIZE -40
+#define CL_INVALID_SAMPLER -41
+#define CL_INVALID_BINARY -42
+#define CL_INVALID_BUILD_OPTIONS -43
+#define CL_INVALID_PROGRAM -44
+#define CL_INVALID_PROGRAM_EXECUTABLE -45
+#define CL_INVALID_KERNEL_NAME -46
+#define CL_INVALID_KERNEL_DEFINITION -47
+#define CL_INVALID_KERNEL -48
+#define CL_INVALID_ARG_INDEX -49
+#define CL_INVALID_ARG_VALUE -50
+#define CL_INVALID_ARG_SIZE -51
+#define CL_INVALID_KERNEL_ARGS -52
+#define CL_INVALID_WORK_DIMENSION -53
+#define CL_INVALID_WORK_GROUP_SIZE -54
+#define CL_INVALID_WORK_ITEM_SIZE -55
+#define CL_INVALID_GLOBAL_OFFSET -56
+#define CL_INVALID_EVENT_WAIT_LIST -57
+#define CL_INVALID_EVENT -58
+#define CL_INVALID_OPERATION -59
+#define CL_INVALID_GL_OBJECT -60
+#define CL_INVALID_BUFFER_SIZE -61
+#define CL_INVALID_MIP_LEVEL -62
+#define CL_INVALID_GLOBAL_WORK_SIZE -63
+#define CL_INVALID_PROPERTY -64
+
+/* OpenCL Version */
+#define CL_VERSION_1_0 1
+#define CL_VERSION_1_1 1
+
+/* cl_bool */
+#define CL_FALSE 0
+#define CL_TRUE 1
+
+/* cl_platform_info */
+#define CL_PLATFORM_PROFILE 0x0900
+#define CL_PLATFORM_VERSION 0x0901
+#define CL_PLATFORM_NAME 0x0902
+#define CL_PLATFORM_VENDOR 0x0903
+#define CL_PLATFORM_EXTENSIONS 0x0904
+
+/* cl_device_type - bitfield */
+#define CL_DEVICE_TYPE_DEFAULT (1 << 0)
+#define CL_DEVICE_TYPE_CPU (1 << 1)
+#define CL_DEVICE_TYPE_GPU (1 << 2)
+#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3)
+#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF
+
+/* cl_device_info */
+#define CL_DEVICE_TYPE 0x1000
+#define CL_DEVICE_VENDOR_ID 0x1001
+#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002
+#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003
+#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004
+#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B
+#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C
+#define CL_DEVICE_ADDRESS_BITS 0x100D
+#define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E
+#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F
+#define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010
+#define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011
+#define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012
+#define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013
+#define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014
+#define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015
+#define CL_DEVICE_IMAGE_SUPPORT 0x1016
+#define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017
+#define CL_DEVICE_MAX_SAMPLERS 0x1018
+#define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019
+#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A
+#define CL_DEVICE_SINGLE_FP_CONFIG 0x101B
+#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C
+#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D
+#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E
+#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F
+#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020
+#define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021
+#define CL_DEVICE_LOCAL_MEM_TYPE 0x1022
+#define CL_DEVICE_LOCAL_MEM_SIZE 0x1023
+#define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024
+#define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025
+#define CL_DEVICE_ENDIAN_LITTLE 0x1026
+#define CL_DEVICE_AVAILABLE 0x1027
+#define CL_DEVICE_COMPILER_AVAILABLE 0x1028
+#define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029
+#define CL_DEVICE_QUEUE_PROPERTIES 0x102A
+#define CL_DEVICE_NAME 0x102B
+#define CL_DEVICE_VENDOR 0x102C
+#define CL_DRIVER_VERSION 0x102D
+#define CL_DEVICE_PROFILE 0x102E
+#define CL_DEVICE_VERSION 0x102F
+#define CL_DEVICE_EXTENSIONS 0x1030
+#define CL_DEVICE_PLATFORM 0x1031
+/* 0x1032 reserved for CL_DEVICE_DOUBLE_FP_CONFIG */
+/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG */
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034
+#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C
+#define CL_DEVICE_OPENCL_C_VERSION 0x103D
+
+/* cl_device_fp_config - bitfield */
+#define CL_FP_DENORM (1 << 0)
+#define CL_FP_INF_NAN (1 << 1)
+#define CL_FP_ROUND_TO_NEAREST (1 << 2)
+#define CL_FP_ROUND_TO_ZERO (1 << 3)
+#define CL_FP_ROUND_TO_INF (1 << 4)
+#define CL_FP_FMA (1 << 5)
+#define CL_FP_SOFT_FLOAT (1 << 6)
+
+/* cl_device_mem_cache_type */
+#define CL_NONE 0x0
+#define CL_READ_ONLY_CACHE 0x1
+#define CL_READ_WRITE_CACHE 0x2
+
+/* cl_device_local_mem_type */
+#define CL_LOCAL 0x1
+#define CL_GLOBAL 0x2
+
+/* cl_device_exec_capabilities - bitfield */
+#define CL_EXEC_KERNEL (1 << 0)
+#define CL_EXEC_NATIVE_KERNEL (1 << 1)
+
+/* cl_command_queue_properties - bitfield */
+#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0)
+#define CL_QUEUE_PROFILING_ENABLE (1 << 1)
+
+/* cl_context_info */
+#define CL_CONTEXT_REFERENCE_COUNT 0x1080
+#define CL_CONTEXT_DEVICES 0x1081
+#define CL_CONTEXT_PROPERTIES 0x1082
+#define CL_CONTEXT_NUM_DEVICES 0x1083
+
+/* cl_context_info + cl_context_properties */
+#define CL_CONTEXT_PLATFORM 0x1084
+
+/* cl_command_queue_info */
+#define CL_QUEUE_CONTEXT 0x1090
+#define CL_QUEUE_DEVICE 0x1091
+#define CL_QUEUE_REFERENCE_COUNT 0x1092
+#define CL_QUEUE_PROPERTIES 0x1093
+
+/* cl_mem_flags - bitfield */
+#define CL_MEM_READ_WRITE (1 << 0)
+#define CL_MEM_WRITE_ONLY (1 << 1)
+#define CL_MEM_READ_ONLY (1 << 2)
+#define CL_MEM_USE_HOST_PTR (1 << 3)
+#define CL_MEM_ALLOC_HOST_PTR (1 << 4)
+#define CL_MEM_COPY_HOST_PTR (1 << 5)
+
+/* cl_channel_order */
+#define CL_R 0x10B0
+#define CL_A 0x10B1
+#define CL_RG 0x10B2
+#define CL_RA 0x10B3
+#define CL_RGB 0x10B4
+#define CL_RGBA 0x10B5
+#define CL_BGRA 0x10B6
+#define CL_ARGB 0x10B7
+#define CL_INTENSITY 0x10B8
+#define CL_LUMINANCE 0x10B9
+#define CL_Rx 0x10BA
+#define CL_RGx 0x10BB
+#define CL_RGBx 0x10BC
+
+/* cl_channel_type */
+#define CL_SNORM_INT8 0x10D0
+#define CL_SNORM_INT16 0x10D1
+#define CL_UNORM_INT8 0x10D2
+#define CL_UNORM_INT16 0x10D3
+#define CL_UNORM_SHORT_565 0x10D4
+#define CL_UNORM_SHORT_555 0x10D5
+#define CL_UNORM_INT_101010 0x10D6
+#define CL_SIGNED_INT8 0x10D7
+#define CL_SIGNED_INT16 0x10D8
+#define CL_SIGNED_INT32 0x10D9
+#define CL_UNSIGNED_INT8 0x10DA
+#define CL_UNSIGNED_INT16 0x10DB
+#define CL_UNSIGNED_INT32 0x10DC
+#define CL_HALF_FLOAT 0x10DD
+#define CL_FLOAT 0x10DE
+
+/* cl_mem_type */
+#define CL_MEM_OBJECT_BUFFER 0x10F0
+#define CL_MEM_OBJECT_IMAGE2D 0x10F1
+#define CL_MEM_OBJECT_IMAGE3D 0x10F2
+
+/* cl_mem_info */
+#define CL_MEM_TYPE 0x1100
+#define CL_MEM_FLAGS 0x1101
+#define CL_MEM_SIZE 0x1102
+#define CL_MEM_HOST_PTR 0x1103
+#define CL_MEM_MAP_COUNT 0x1104
+#define CL_MEM_REFERENCE_COUNT 0x1105
+#define CL_MEM_CONTEXT 0x1106
+#define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107
+#define CL_MEM_OFFSET 0x1108
+
+/* cl_image_info */
+#define CL_IMAGE_FORMAT 0x1110
+#define CL_IMAGE_ELEMENT_SIZE 0x1111
+#define CL_IMAGE_ROW_PITCH 0x1112
+#define CL_IMAGE_SLICE_PITCH 0x1113
+#define CL_IMAGE_WIDTH 0x1114
+#define CL_IMAGE_HEIGHT 0x1115
+#define CL_IMAGE_DEPTH 0x1116
+
+/* cl_addressing_mode */
+#define CL_ADDRESS_NONE 0x1130
+#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131
+#define CL_ADDRESS_CLAMP 0x1132
+#define CL_ADDRESS_REPEAT 0x1133
+#define CL_ADDRESS_MIRRORED_REPEAT 0x1134
+
+/* cl_filter_mode */
+#define CL_FILTER_NEAREST 0x1140
+#define CL_FILTER_LINEAR 0x1141
+
+/* cl_sampler_info */
+#define CL_SAMPLER_REFERENCE_COUNT 0x1150
+#define CL_SAMPLER_CONTEXT 0x1151
+#define CL_SAMPLER_NORMALIZED_COORDS 0x1152
+#define CL_SAMPLER_ADDRESSING_MODE 0x1153
+#define CL_SAMPLER_FILTER_MODE 0x1154
+
+/* cl_map_flags - bitfield */
+#define CL_MAP_READ (1 << 0)
+#define CL_MAP_WRITE (1 << 1)
+
+/* cl_program_info */
+#define CL_PROGRAM_REFERENCE_COUNT 0x1160
+#define CL_PROGRAM_CONTEXT 0x1161
+#define CL_PROGRAM_NUM_DEVICES 0x1162
+#define CL_PROGRAM_DEVICES 0x1163
+#define CL_PROGRAM_SOURCE 0x1164
+#define CL_PROGRAM_BINARY_SIZES 0x1165
+#define CL_PROGRAM_BINARIES 0x1166
+
+/* cl_program_build_info */
+#define CL_PROGRAM_BUILD_STATUS 0x1181
+#define CL_PROGRAM_BUILD_OPTIONS 0x1182
+#define CL_PROGRAM_BUILD_LOG 0x1183
+
+/* cl_build_status */
+#define CL_BUILD_SUCCESS 0
+#define CL_BUILD_NONE -1
+#define CL_BUILD_ERROR -2
+#define CL_BUILD_IN_PROGRESS -3
+
+/* cl_kernel_info */
+#define CL_KERNEL_FUNCTION_NAME 0x1190
+#define CL_KERNEL_NUM_ARGS 0x1191
+#define CL_KERNEL_REFERENCE_COUNT 0x1192
+#define CL_KERNEL_CONTEXT 0x1193
+#define CL_KERNEL_PROGRAM 0x1194
+
+/* cl_kernel_work_group_info */
+#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0
+#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1
+#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2
+#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3
+#define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4
+
+/* cl_event_info */
+#define CL_EVENT_COMMAND_QUEUE 0x11D0
+#define CL_EVENT_COMMAND_TYPE 0x11D1
+#define CL_EVENT_REFERENCE_COUNT 0x11D2
+#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3
+#define CL_EVENT_CONTEXT 0x11D4
+
+/* cl_command_type */
+#define CL_COMMAND_NDRANGE_KERNEL 0x11F0
+#define CL_COMMAND_TASK 0x11F1
+#define CL_COMMAND_NATIVE_KERNEL 0x11F2
+#define CL_COMMAND_READ_BUFFER 0x11F3
+#define CL_COMMAND_WRITE_BUFFER 0x11F4
+#define CL_COMMAND_COPY_BUFFER 0x11F5
+#define CL_COMMAND_READ_IMAGE 0x11F6
+#define CL_COMMAND_WRITE_IMAGE 0x11F7
+#define CL_COMMAND_COPY_IMAGE 0x11F8
+#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9
+#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA
+#define CL_COMMAND_MAP_BUFFER 0x11FB
+#define CL_COMMAND_MAP_IMAGE 0x11FC
+#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD
+#define CL_COMMAND_MARKER 0x11FE
+#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF
+#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200
+#define CL_COMMAND_READ_BUFFER_RECT 0x1201
+#define CL_COMMAND_WRITE_BUFFER_RECT 0x1202
+#define CL_COMMAND_COPY_BUFFER_RECT 0x1203
+#define CL_COMMAND_USER 0x1204
+
+/* command execution status */
+#define CL_COMPLETE 0x0
+#define CL_RUNNING 0x1
+#define CL_SUBMITTED 0x2
+#define CL_QUEUED 0x3
+
+/* cl_buffer_create_type */
+#define CL_BUFFER_CREATE_TYPE_REGION 0x1220
+
+/* cl_profiling_info */
+#define CL_PROFILING_COMMAND_QUEUED 0x1280
+#define CL_PROFILING_COMMAND_SUBMIT 0x1281
+#define CL_PROFILING_COMMAND_START 0x1282
+#define CL_PROFILING_COMMAND_END 0x1283
+
+/********************************************************************************************************/
+
+/* Platform API */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetPlatformIDs(cl_uint /* num_entries */,
+ cl_platform_id * /* platforms */,
+ cl_uint * /* num_platforms */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetPlatformInfo(cl_platform_id /* platform */,
+ cl_platform_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Device APIs */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetDeviceIDs(cl_platform_id /* platform */,
+ cl_device_type /* device_type */,
+ cl_uint /* num_entries */,
+ cl_device_id * /* devices */,
+ cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetDeviceInfo(cl_device_id /* device */,
+ cl_device_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Context APIs */
+extern CL_API_ENTRY cl_context CL_API_CALL
+clCreateContext(const cl_context_properties * /* properties */,
+ cl_uint /* num_devices */,
+ const cl_device_id * /* devices */,
+ void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *),
+ void * /* user_data */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_context CL_API_CALL
+clCreateContextFromtype(const cl_context_properties * /* properties */,
+ cl_device_type /* device_type */,
+ void (CL_CALLBACK * /* pfn_notify*/ )(const char *, const void *, size_t, void *),
+ void * /* user_data */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetContextInfo(cl_context /* context */,
+ cl_context_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Command Queue APIs */
+extern CL_API_ENTRY cl_command_queue CL_API_CALL
+clCreateCommandQueue(cl_context /* context */,
+ cl_device_id /* device */,
+ cl_command_queue_properties /* properties */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetCommandQueueInfo(cl_command_queue /* command_queue */,
+ cl_command_queue_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
+#warning CL_USE_DEPRECATED_OPENCL_1_0_APIS is defined. These APIs are unsupported and untested in OpenCL 1.1!
+/*
+ * WARNING:
+ * This API introduces mutable state into the OpenCL implementation. It has been REMOVED
+ * to better facilitate thread safety. The 1.0 API is not thread safe. It is not tested by the
+ * OpenCL 1.1 conformance test, and consequently may not work or may not work dependably.
+ * It is likely to be non-performant. Use of this API is not advised. Use at your own risk.
+ *
+ * Software developers previously relying on this API are instructed to set the command queue
+ * properties when creating the queue, instead.
+ */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetCommandQueueProperty(cl_command_queue /* command_queue */,
+ cl_command_queue_properties /* properties */,
+ cl_bool /* enable */,
+ cl_command_queue_properties * /* old_properties */) CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED;
+#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */
+
+/* Memory Object APIs */
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateBuffer(cl_context /* context */,
+ cl_mem_flags /* flags */,
+ size_t /* size */,
+ void * /* host_ptr */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateSubBuffer(cl_mem /* buffer */,
+ cl_mem_flags /* flags */,
+ cl_buffer_create_type /* buffer_create_type */,
+ const void * /* buffer_create_info */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateImage2D(cl_context /* context */,
+ cl_mem_flags /* flags */,
+ const cl_image_format * /* image_format */,
+ size_t /* image_width */,
+ size_t /* image_height */,
+ size_t /* image_row_pitch */,
+ void * /* host_ptr */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateImage3D(cl_context /* context */,
+ cl_mem_flags /* flags */,
+ const cl_image_format * /* image_format */,
+ size_t /* image_width */,
+ size_t /* image_height */,
+ size_t /* image_depth */,
+ size_t /* image_row_pitch */,
+ size_t /* image_slice_pitch */,
+ void * /* host_ptr */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetSupportedImageFormats(cl_context /* context */,
+ cl_mem_flags /* flags */,
+ cl_mem_type /* image_type */,
+ cl_uint /* num_entries */,
+ cl_image_format * /* image_formats */,
+ cl_uint * /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetMemObjectInfo(cl_mem /* memobj */,
+ cl_mem_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetImageInfo(cl_mem /* image */,
+ cl_image_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetMemObjectDestructorCallback( cl_mem /* memobj */,
+ void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
+ void * /*user_data */ ) CL_API_SUFFIX__VERSION_1_1;
+
+/* Sampler APIs */
+extern CL_API_ENTRY cl_sampler CL_API_CALL
+clCreateSampler(cl_context /* context */,
+ cl_bool /* normalized_coords */,
+ cl_addressing_mode /* addressing_mode */,
+ cl_filter_mode /* filter_mode */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetSamplerInfo(cl_sampler /* sampler */,
+ cl_sampler_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Program Object APIs */
+extern CL_API_ENTRY cl_program CL_API_CALL
+clCreateProgramWithSource(cl_context /* context */,
+ cl_uint /* count */,
+ const char ** /* strings */,
+ const size_t * /* lengths */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_program CL_API_CALL
+clCreateProgramWithBinary(cl_context /* context */,
+ cl_uint /* num_devices */,
+ const cl_device_id * /* device_list */,
+ const size_t * /* lengths */,
+ const unsigned char ** /* binaries */,
+ cl_int * /* binary_status */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clBuildProgram(cl_program /* program */,
+ cl_uint /* num_devices */,
+ const cl_device_id * /* device_list */,
+ const char * /* options */,
+ void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
+ void * /* user_data */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clUnloadCompiler(void) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetProgramInfo(cl_program /* program */,
+ cl_program_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetProgramBuildInfo(cl_program /* program */,
+ cl_device_id /* device */,
+ cl_program_build_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Kernel Object APIs */
+extern CL_API_ENTRY cl_kernel CL_API_CALL
+clCreateKernel(cl_program /* program */,
+ const char * /* kernel_name */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clCreateKernelsInProgram(cl_program /* program */,
+ cl_uint /* num_kernels */,
+ cl_kernel * /* kernels */,
+ cl_uint * /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetKernelArg(cl_kernel /* kernel */,
+ cl_uint /* arg_index */,
+ size_t /* arg_size */,
+ const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetKernelInfo(cl_kernel /* kernel */,
+ cl_kernel_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetKernelWorkGroupInfo(cl_kernel /* kernel */,
+ cl_device_id /* device */,
+ cl_kernel_work_group_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Event Object APIs */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clWaitForEvents(cl_uint /* num_events */,
+ const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetEventInfo(cl_event /* event */,
+ cl_event_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_event CL_API_CALL
+clCreateUserEvent(cl_context /* context */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetUserEventStatus(cl_event /* event */,
+ cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetEventCallback( cl_event /* event */,
+ cl_int /* command_exec_callback_type */,
+ void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *),
+ void * /* user_data */) CL_API_SUFFIX__VERSION_1_1;
+
+/* Profiling APIs */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetEventProfilingInfo(cl_event /* event */,
+ cl_profiling_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Flush and Finish APIs */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clFlush(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Enqueued Commands APIs */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReadBuffer(cl_command_queue /* command_queue */,
+ cl_mem /* buffer */,
+ cl_bool /* blocking_read */,
+ size_t /* offset */,
+ size_t /* cb */,
+ void * /* ptr */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReadBufferRect(cl_command_queue /* command_queue */,
+ cl_mem /* buffer */,
+ cl_bool /* blocking_read */,
+ const size_t * /* buffer_origin */,
+ const size_t * /* host_origin */,
+ const size_t * /* region */,
+ size_t /* buffer_row_pitch */,
+ size_t /* buffer_slice_pitch */,
+ size_t /* host_row_pitch */,
+ size_t /* host_slice_pitch */,
+ void * /* ptr */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueWriteBuffer(cl_command_queue /* command_queue */,
+ cl_mem /* buffer */,
+ cl_bool /* blocking_write */,
+ size_t /* offset */,
+ size_t /* cb */,
+ const void * /* ptr */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueWriteBufferRect(cl_command_queue /* command_queue */,
+ cl_mem /* buffer */,
+ cl_bool /* blocking_write */,
+ const size_t * /* buffer_origin */,
+ const size_t * /* host_origin */,
+ const size_t * /* region */,
+ size_t /* buffer_row_pitch */,
+ size_t /* buffer_slice_pitch */,
+ size_t /* host_row_pitch */,
+ size_t /* host_slice_pitch */,
+ const void * /* ptr */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyBuffer(cl_command_queue /* command_queue */,
+ cl_mem /* src_buffer */,
+ cl_mem /* dst_buffer */,
+ size_t /* src_offset */,
+ size_t /* dst_offset */,
+ size_t /* cb */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyBufferRect(cl_command_queue /* command_queue */,
+ cl_mem /* src_buffer */,
+ cl_mem /* dst_buffer */,
+ const size_t * /* src_origin */,
+ const size_t * /* dst_origin */,
+ const size_t * /* region */,
+ size_t /* src_row_pitch */,
+ size_t /* src_slice_pitch */,
+ size_t /* dst_row_pitch */,
+ size_t /* dst_slice_pitch */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReadImage(cl_command_queue /* command_queue */,
+ cl_mem /* image */,
+ cl_bool /* blocking_read */,
+ const size_t * /* origin[3] */,
+ const size_t * /* region[3] */,
+ size_t /* row_pitch */,
+ size_t /* slice_pitch */,
+ void * /* ptr */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueWriteImage(cl_command_queue /* command_queue */,
+ cl_mem /* image */,
+ cl_bool /* blocking_write */,
+ const size_t * /* origin[3] */,
+ const size_t * /* region[3] */,
+ size_t /* input_row_pitch */,
+ size_t /* input_slice_pitch */,
+ const void * /* ptr */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyImage(cl_command_queue /* command_queue */,
+ cl_mem /* src_image */,
+ cl_mem /* dst_image */,
+ const size_t * /* src_origin[3] */,
+ const size_t * /* dst_origin[3] */,
+ const size_t * /* region[3] */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyImageToBuffer(cl_command_queue /* command_queue */,
+ cl_mem /* src_image */,
+ cl_mem /* dst_buffer */,
+ const size_t * /* src_origin[3] */,
+ const size_t * /* region[3] */,
+ size_t /* dst_offset */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyBufferToImage(cl_command_queue /* command_queue */,
+ cl_mem /* src_buffer */,
+ cl_mem /* dst_image */,
+ size_t /* src_offset */,
+ const size_t * /* dst_origin[3] */,
+ const size_t * /* region[3] */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY void * CL_API_CALL
+clEnqueueMapBuffer(cl_command_queue /* command_queue */,
+ cl_mem /* buffer */,
+ cl_bool /* blocking_map */,
+ cl_map_flags /* map_flags */,
+ size_t /* offset */,
+ size_t /* cb */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY void * CL_API_CALL
+clEnqueueMapImage(cl_command_queue /* command_queue */,
+ cl_mem /* image */,
+ cl_bool /* blocking_map */,
+ cl_map_flags /* map_flags */,
+ const size_t * /* origin[3] */,
+ const size_t * /* region[3] */,
+ size_t * /* image_row_pitch */,
+ size_t * /* image_slice_pitch */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueUnmapMemObject(cl_command_queue /* command_queue */,
+ cl_mem /* memobj */,
+ void * /* mapped_ptr */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
+ cl_kernel /* kernel */,
+ cl_uint /* work_dim */,
+ const size_t * /* global_work_offset */,
+ const size_t * /* global_work_size */,
+ const size_t * /* local_work_size */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueTask(cl_command_queue /* command_queue */,
+ cl_kernel /* kernel */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueNativeKernel(cl_command_queue /* command_queue */,
+ void (*user_func)(void *),
+ void * /* args */,
+ size_t /* cb_args */,
+ cl_uint /* num_mem_objects */,
+ const cl_mem * /* mem_list */,
+ const void ** /* args_mem_loc */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueMarker(cl_command_queue /* command_queue */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueWaitForEvents(cl_command_queue /* command_queue */,
+ cl_uint /* num_events */,
+ const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueBarrier(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Extension function access
+ *
+ * Returns the extension function address for the given function name,
+ * or NULL if a valid function can not be found. The client must
+ * check to make sure the address is not NULL, before using or
+ * calling the returned function address.
+ */
+extern CL_API_ENTRY void * CL_API_CALL clGetExtensionFunctionAddress(const char * /* func_name */) CL_API_SUFFIX__VERSION_1_0;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __OPENCL_CL_H */
+
diff --git a/include/CL/cl_d3d10.h b/include/CL/cl_d3d10.h
new file mode 100644
index 0000000..f36ca4b
--- /dev/null
+++ b/include/CL/cl_d3d10.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
+
+#ifndef __OPENCL_CL_D3D10_H
+#define __OPENCL_CL_D3D10_H
+
+#ifdef __D3D10__
+#include <d3d10.h>
+#endif
+#include <CL/cl.h>
+#include <CL/cl_platform.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/******************************************************************************
+ * cl_khr_d3d10_sharing */
+#define cl_khr_d3d10_sharing 1
+
+typedef cl_uint cl_d3d10_device_source_khr;
+typedef cl_uint cl_d3d10_device_set_khr;
+
+/******************************************************************************/
+
+// Error Codes
+#define CL_INVALID_D3D10_DEVICE_KHR -1002
+#define CL_INVALID_D3D10_RESOURCE_KHR -1003
+#define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR -1004
+#define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR -1005
+
+// cl_d3d10_device_source_nv
+#define CL_D3D10_DEVICE_KHR 0x4010
+#define CL_D3D10_DXGI_ADAPTER_KHR 0x4011
+
+// cl_d3d10_device_set_nv
+#define CL_PREFERRED_DEVICES_FOR_D3D10_KHR 0x4012
+#define CL_ALL_DEVICES_FOR_D3D10_KHR 0x4013
+
+// cl_context_info
+#define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014
+#define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C
+
+// cl_mem_info
+#define CL_MEM_D3D10_RESOURCE_KHR 0x4015
+
+// cl_image_info
+#define CL_IMAGE_D3D10_SUBRESOURCE_KHR 0x4016
+
+// cl_command_type
+#define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR 0x4017
+#define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR 0x4018
+
+/******************************************************************************/
+#ifndef __D3D10__
+typedef struct {int i;} ID3D10Buffer;
+typedef struct {int i;} ID3D10Texture2D;
+typedef struct {int i;} ID3D10Texture3D;
+#endif
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)(
+ cl_platform_id platform,
+ cl_d3d10_device_source_khr d3d_device_source,
+ void * d3d_object,
+ cl_d3d10_device_set_khr d3d_device_set,
+ cl_uint num_entries,
+ cl_device_id * devices,
+ cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)(
+ cl_context context,
+ cl_mem_flags flags,
+ ID3D10Buffer * resource,
+ cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)(
+ cl_context context,
+ cl_mem_flags flags,
+ ID3D10Texture2D * resource,
+ UINT subresource,
+ cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)(
+ cl_context context,
+ cl_mem_flags flags,
+ ID3D10Texture3D * resource,
+ UINT subresource,
+ cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)(
+ cl_command_queue command_queue,
+ cl_uint num_objects,
+ const cl_mem * mem_objects,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)(
+ cl_command_queue command_queue,
+ cl_uint num_objects,
+ cl_mem * mem_objects,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event) CL_API_SUFFIX__VERSION_1_0;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // __OPENCL_CL_D3D10_H
+
diff --git a/include/CL/cl_d3d9.h b/include/CL/cl_d3d9.h
new file mode 100644
index 0000000..babc611
--- /dev/null
+++ b/include/CL/cl_d3d9.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/* $Revision$ on $Date$ */
+
+#ifndef __OPENCL_CL_D3D9_H
+#define __OPENCL_CL_D3D9_H
+
+#include <CL/cl_platform.h>
+#include <d3d9.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* cl_khr_d3d9_sharing extension */
+#define cl_khr_d3d9_sharing 1
+
+/* cl_context_properties */
+#define CL_CONTEXT_D3D9_DEVICE 0x1085
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromD3D9BufferKHR(
+ cl_context /* context */,
+ cl_mem_flags /* flags */,
+ IDirect3DResource9 * /* resource */,
+ HANDLE /* shared_handle */,
+ cl_int * /* errcode_ret */);
+
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromD3D9TextureKHR(
+ cl_context /* context */,
+ cl_mem_flags /* flags */,
+ IDirect3DTexture9 * /* texture */,
+ HANDLE /* shared_handle */,
+ UINT /* miplevel */,
+ cl_int * /* errcode_ret */);
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromD3D9VolumeTextureKHR(
+ cl_context /* context */,
+ cl_mem_flags /* flags */,
+ IDirect3DVolumeTexture9 * /* resource */,
+ HANDLE /* shared_handle */,
+ UINT /* miplevel */,
+ cl_int * /* errcode_ret */);
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromD3D9CubeTextureKHR(
+ cl_context /* context */,
+ cl_mem_flags /* flags */,
+ IDirect3DCubeTexture9 * /* resource */,
+ HANDLE /* shared_handle */,
+ D3DCUBEMAP_FACES Facetype /* face */,
+ UINT /* miplevel */,
+ cl_int * /* errcode_ret */);
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueAcquireD3D9ObjectsKHR(
+ cl_command_queue /* command_queue */,
+ cl_uint /* num_objects */,
+ const cl_mem * /* mem_objects */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */);
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReleaseD3D9ObjectsKHR(
+ cl_command_queue /* command_queue */,
+ cl_uint /* num_objects */,
+ const cl_mem * /* mem_objects */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __OPENCL_CL_D3D9_H */
+
diff --git a/include/CL/cl_ext.h b/include/CL/cl_ext.h
new file mode 100644
index 0000000..bbbce6c
--- /dev/null
+++ b/include/CL/cl_ext.h
@@ -0,0 +1,209 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/* $Revision: 11928 $ on $Date: 2010-07-13 09:04:56 -0700 (Tue, 13 Jul 2010) $ */
+
+/* cl_ext.h contains OpenCL extensions which don't have external */
+/* (OpenGL, D3D) dependencies. */
+
+#ifndef __CL_EXT_H
+#define __CL_EXT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef __APPLE__
+ #include <OpenCL/cl.h>
+ #include <AvailabilityMacros.h>
+#else
+ #include <CL/cl.h>
+#endif
+
+/* cl_khr_fp64 extension - no extension #define since it has no functions */
+#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
+
+/* cl_khr_fp16 extension - no extension #define since it has no functions */
+#define CL_DEVICE_HALF_FP_CONFIG 0x1033
+
+/* Memory object destruction
+ *
+ * Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR
+ *
+ * Registers a user callback function that will be called when the memory object is deleted and its resources
+ * freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback
+ * stack associated with memobj. The registered user callback functions are called in the reverse order in
+ * which they were registered. The user callback functions are called and then the memory object is deleted
+ * and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be
+ * notified when the memory referenced by host_ptr, specified when the memory object is created and used as
+ * the storage bits for the memory object, can be reused or freed.
+ *
+ * The application may not call CL api's with the cl_mem object passed to the pfn_notify.
+ *
+ * Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
+ * before using.
+ */
+#define cl_APPLE_SetMemObjectDestructor 1
+cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem /* memobj */,
+ void (* /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
+ void * /*user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
+
+
+/* Context Logging Functions
+ *
+ * The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext().
+ * Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
+ * before using.
+ *
+ * clLogMessagesToSystemLog fowards on all log messages to the Apple System Logger
+ */
+#define cl_APPLE_ContextLoggingFunctions 1
+extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * /* errstr */,
+ const void * /* private_info */,
+ size_t /* cb */,
+ void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
+
+/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */
+extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * /* errstr */,
+ const void * /* private_info */,
+ size_t /* cb */,
+ void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
+
+/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */
+extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * /* errstr */,
+ const void * /* private_info */,
+ size_t /* cb */,
+ void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
+
+
+/************************
+* cl_khr_icd extension *
+************************/
+#define cl_khr_icd 1
+
+/* cl_platform_info */
+#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920
+
+/* Additional Error Codes */
+#define CL_PLATFORM_NOT_FOUND_KHR -1001
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clIcdGetPlatformIDsKHR(cl_uint /* num_entries */,
+ cl_platform_id * /* platforms */,
+ cl_uint * /* num_platforms */);
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(
+ cl_uint /* num_entries */,
+ cl_platform_id * /* platforms */,
+ cl_uint * /* num_platforms */);
+
+
+/******************************************
+* cl_nv_device_attribute_query extension *
+******************************************/
+/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
+#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
+#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
+#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002
+#define CL_DEVICE_WARP_SIZE_NV 0x4003
+#define CL_DEVICE_GPU_OVERLAP_NV 0x4004
+#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
+#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
+
+
+/*********************************
+* cl_amd_device_attribute_query *
+*********************************/
+#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036
+
+
+#ifdef CL_VERSION_1_1
+ /***********************************
+ * cl_ext_device_fission extension *
+ ***********************************/
+ #define cl_ext_device_fission 1
+
+ extern CL_API_ENTRY cl_int CL_API_CALL
+ clReleaseDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
+
+ typedef CL_API_ENTRY cl_int
+ (CL_API_CALL *clReleaseDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
+
+ extern CL_API_ENTRY cl_int CL_API_CALL
+ clRetainDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
+
+ typedef CL_API_ENTRY cl_int
+ (CL_API_CALL *clRetainDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
+
+ typedef cl_ulong cl_device_partition_property_ext;
+ extern CL_API_ENTRY cl_int CL_API_CALL
+ clCreateSubDevicesEXT( cl_device_id /*in_device*/,
+ const cl_device_partition_property_ext * /* properties */,
+ cl_uint /*num_entries*/,
+ cl_device_id * /*out_devices*/,
+ cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
+
+ typedef CL_API_ENTRY cl_int
+ ( CL_API_CALL * clCreateSubDevicesEXT_fn)( cl_device_id /*in_device*/,
+ const cl_device_partition_property_ext * /* properties */,
+ cl_uint /*num_entries*/,
+ cl_device_id * /*out_devices*/,
+ cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
+
+ /* cl_device_partition_property_ext */
+ #define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050
+ #define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051
+ #define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052
+ #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053
+
+ /* clDeviceGetInfo selectors */
+ #define CL_DEVICE_PARENT_DEVICE_EXT 0x4054
+ #define CL_DEVICE_PARTITION_TYPES_EXT 0x4055
+ #define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056
+ #define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057
+ #define CL_DEVICE_PARTITION_STYLE_EXT 0x4058
+
+ /* error codes */
+ #define CL_DEVICE_PARTITION_FAILED_EXT -1057
+ #define CL_INVALID_PARTITION_COUNT_EXT -1058
+ #define CL_INVALID_PARTITION_NAME_EXT -1059
+
+ /* CL_AFFINITY_DOMAINs */
+ #define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1
+ #define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2
+ #define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3
+ #define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4
+ #define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10
+ #define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100
+
+ /* cl_device_partition_property_ext list terminators */
+ #define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0)
+ #define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0)
+ #define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1)
+
+
+
+#endif /* CL_VERSION_1_1 */
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* __CL_EXT_H */
diff --git a/include/CL/cl_gl.h b/include/CL/cl_gl.h
new file mode 100644
index 0000000..e2e536e
--- /dev/null
+++ b/include/CL/cl_gl.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
+
+/*
+ * cl_gl.h contains Khronos-approved (KHR) OpenCL extensions which have
+ * OpenGL dependencies. The application is responsible for #including
+ * OpenGL or OpenGL ES headers before #including cl_gl.h.
+ */
+
+#ifndef __OPENCL_CL_GL_H
+#define __OPENCL_CL_GL_H
+
+#ifdef __APPLE__
+#include <OpenCL/cl.h>
+#include <OpenGL/CGLDevice.h>
+#else
+#include <CL/cl.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef cl_uint cl_gl_object_type;
+typedef cl_uint cl_gl_texture_info;
+typedef cl_uint cl_gl_platform_info;
+typedef struct __GLsync *cl_GLsync;
+
+/* cl_gl_object_type */
+#define CL_GL_OBJECT_BUFFER 0x2000
+#define CL_GL_OBJECT_TEXTURE2D 0x2001
+#define CL_GL_OBJECT_TEXTURE3D 0x2002
+#define CL_GL_OBJECT_RENDERBUFFER 0x2003
+
+/* cl_gl_texture_info */
+#define CL_GL_TEXTURE_TARGET 0x2004
+#define CL_GL_MIPMAP_LEVEL 0x2005
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromGLBuffer(cl_context /* context */,
+ cl_mem_flags /* flags */,
+ cl_GLuint /* bufobj */,
+ int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromGLTexture2D(cl_context /* context */,
+ cl_mem_flags /* flags */,
+ cl_GLenum /* target */,
+ cl_GLint /* miplevel */,
+ cl_GLuint /* texture */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromGLTexture3D(cl_context /* context */,
+ cl_mem_flags /* flags */,
+ cl_GLenum /* target */,
+ cl_GLint /* miplevel */,
+ cl_GLuint /* texture */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromGLRenderbuffer(cl_context /* context */,
+ cl_mem_flags /* flags */,
+ cl_GLuint /* renderbuffer */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetGLObjectInfo(cl_mem /* memobj */,
+ cl_gl_object_type * /* gl_object_type */,
+ cl_GLuint * /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetGLTextureInfo(cl_mem /* memobj */,
+ cl_gl_texture_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueAcquireGLObjects(cl_command_queue /* command_queue */,
+ cl_uint /* num_objects */,
+ const cl_mem * /* mem_objects */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReleaseGLObjects(cl_command_queue /* command_queue */,
+ cl_uint /* num_objects */,
+ const cl_mem * /* mem_objects */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+/* cl_khr_gl_sharing extension */
+
+#define cl_khr_gl_sharing 1
+
+typedef cl_uint cl_gl_context_info;
+
+/* Additional Error Codes */
+#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000
+
+/* cl_gl_context_info */
+#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006
+#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007
+
+/* Additional cl_context_properties */
+#define CL_GL_CONTEXT_KHR 0x2008
+#define CL_EGL_DISPLAY_KHR 0x2009
+#define CL_GLX_DISPLAY_KHR 0x200A
+#define CL_WGL_HDC_KHR 0x200B
+#define CL_CGL_SHAREGROUP_KHR 0x200C
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetGLContextInfoKHR(const cl_context_properties * /* properties */,
+ cl_gl_context_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
+ const cl_context_properties * properties,
+ cl_gl_context_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __OPENCL_CL_GL_H */
diff --git a/include/CL/cl_intel.h b/include/CL/cl_intel.h
new file mode 100644
index 0000000..680f948
--- /dev/null
+++ b/include/CL/cl_intel.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __OPENCL_CL_INTEL_H
+#define __OPENCL_CL_INTEL_H
+
+#include "CL/cl.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define CL_MEM_PINNABLE (1 << 10)
+
+/* Track allocations and report current number of unfreed allocations */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clIntelReportUnfreed(void);
+
+/* 1 to 1 mapping of drm_intel_bo_map */
+extern CL_API_ENTRY void* CL_API_CALL
+clIntelMapBuffer(cl_mem, cl_int*);
+
+/* 1 to 1 mapping of drm_intel_bo_unmap */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clIntelUnmapBuffer(cl_mem);
+
+/* Pin /Unpin the buffer in GPU memory (must be root) */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clIntelPinBuffer(cl_mem);
+extern CL_API_ENTRY cl_int CL_API_CALL
+clIntelUnpinBuffer(cl_mem);
+
+/* Get the generation of the Gen device (used to load the proper binary) */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clIntelGetGenVersion(cl_device_id device, cl_int *ver);
+
+/* Create a program from a LLVM source file */
+extern CL_API_ENTRY cl_program CL_API_CALL
+clCreateProgramWithLLVM(cl_context /* context */,
+ cl_uint /* num_devices */,
+ const cl_device_id * /* device_list */,
+ const char * /* file */,
+ cl_int * /* errcode_ret */);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __OPENCL_CL_INTEL_H */
+
diff --git a/include/CL/cl_platform.h b/include/CL/cl_platform.h
new file mode 100644
index 0000000..3d87457
--- /dev/null
+++ b/include/CL/cl_platform.h
@@ -0,0 +1,1194 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/* $Revision: 11803 $ on $Date: 2010-06-25 10:02:12 -0700 (Fri, 25 Jun 2010) $ */
+
+#ifndef __CL_PLATFORM_H
+#define __CL_PLATFORM_H
+
+#ifdef __APPLE__
+ /* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */
+ #include <AvailabilityMacros.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(_WIN32)
+ #define CL_API_ENTRY
+ #define CL_API_CALL __stdcall
+ #define CL_CALLBACK __stdcall
+#else
+ #define CL_API_ENTRY
+ #define CL_API_CALL
+ #define CL_CALLBACK
+#endif
+
+#ifdef __APPLE__
+ #define CL_EXTENSION_WEAK_LINK __attribute__((weak_import))
+ #define CL_API_SUFFIX__VERSION_1_0 AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+ #define CL_EXT_SUFFIX__VERSION_1_0 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+ #define CL_API_SUFFIX__VERSION_1_1 CL_EXTENSION_WEAK_LINK
+ #define CL_EXT_SUFFIX__VERSION_1_1 CL_EXTENSION_WEAK_LINK
+ #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+#else
+ #define CL_EXTENSION_WEAK_LINK
+ #define CL_API_SUFFIX__VERSION_1_0
+ #define CL_EXT_SUFFIX__VERSION_1_0
+ #define CL_API_SUFFIX__VERSION_1_1
+ #define CL_EXT_SUFFIX__VERSION_1_1
+ #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
+#endif
+
+#if (defined (_WIN32) && defined(_MSC_VER))
+
+/* scalar types */
+typedef signed __int8 cl_char;
+typedef unsigned __int8 cl_uchar;
+typedef signed __int16 cl_short;
+typedef unsigned __int16 cl_ushort;
+typedef signed __int32 cl_int;
+typedef unsigned __int32 cl_uint;
+typedef signed __int64 cl_long;
+typedef unsigned __int64 cl_ulong;
+
+typedef unsigned __int16 cl_half;
+typedef float cl_float;
+typedef double cl_double;
+
+/* Macro names and corresponding values defined by OpenCL */
+#define CL_CHAR_BIT 8
+#define CL_SCHAR_MAX 127
+#define CL_SCHAR_MIN (-127-1)
+#define CL_CHAR_MAX CL_SCHAR_MAX
+#define CL_CHAR_MIN CL_SCHAR_MIN
+#define CL_UCHAR_MAX 255
+#define CL_SHRT_MAX 32767
+#define CL_SHRT_MIN (-32767-1)
+#define CL_USHRT_MAX 65535
+#define CL_INT_MAX 2147483647
+#define CL_INT_MIN (-2147483647-1)
+#define CL_UINT_MAX 0xffffffffU
+#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
+#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
+#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
+
+#define CL_FLT_DIG 6
+#define CL_FLT_MANT_DIG 24
+#define CL_FLT_MAX_10_EXP +38
+#define CL_FLT_MAX_EXP +128
+#define CL_FLT_MIN_10_EXP -37
+#define CL_FLT_MIN_EXP -125
+#define CL_FLT_RADIX 2
+#define CL_FLT_MAX 340282346638528859811704183484516925440.0f
+#define CL_FLT_MIN 1.175494350822287507969e-38f
+#define CL_FLT_EPSILON MAKE_HEX_DOUBLE(0x1.0p-23f, 0x1L, -23)
+
+#define CL_DBL_DIG 15
+#define CL_DBL_MANT_DIG 53
+#define CL_DBL_MAX_10_EXP +308
+#define CL_DBL_MAX_EXP +1024
+#define CL_DBL_MIN_10_EXP -307
+#define CL_DBL_MIN_EXP -1021
+#define CL_DBL_RADIX 2
+#define CL_DBL_MAX 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0
+#define CL_DBL_MIN 2.225073858507201383090e-308
+#define CL_DBL_EPSILON 2.220446049250313080847e-16
+
+#define CL_M_E 2.718281828459045090796
+#define CL_M_LOG2E 1.442695040888963387005
+#define CL_M_LOG10E 0.434294481903251816668
+#define CL_M_LN2 0.693147180559945286227
+#define CL_M_LN10 2.302585092994045901094
+#define CL_M_PI 3.141592653589793115998
+#define CL_M_PI_2 1.570796326794896557999
+#define CL_M_PI_4 0.785398163397448278999
+#define CL_M_1_PI 0.318309886183790691216
+#define CL_M_2_PI 0.636619772367581382433
+#define CL_M_2_SQRTPI 1.128379167095512558561
+#define CL_M_SQRT2 1.414213562373095145475
+#define CL_M_SQRT1_2 0.707106781186547572737
+
+#define CL_M_E_F 2.71828174591064f
+#define CL_M_LOG2E_F 1.44269502162933f
+#define CL_M_LOG10E_F 0.43429449200630f
+#define CL_M_LN2_F 0.69314718246460f
+#define CL_M_LN10_F 2.30258512496948f
+#define CL_M_PI_F 3.14159274101257f
+#define CL_M_PI_2_F 1.57079637050629f
+#define CL_M_PI_4_F 0.78539818525314f
+#define CL_M_1_PI_F 0.31830987334251f
+#define CL_M_2_PI_F 0.63661974668503f
+#define CL_M_2_SQRTPI_F 1.12837922573090f
+#define CL_M_SQRT2_F 1.41421353816986f
+#define CL_M_SQRT1_2_F 0.70710676908493f
+
+#define CL_NAN (CL_INFINITY - CL_INFINITY)
+#define CL_HUGE_VALF ((cl_float) 1e50)
+#define CL_HUGE_VAL ((cl_double) 1e500)
+#define CL_MAXFLOAT CL_FLT_MAX
+#define CL_INFINITY CL_HUGE_VALF
+
+#else
+
+#include <stdint.h>
+
+/* scalar types */
+typedef int8_t cl_char;
+typedef uint8_t cl_uchar;
+typedef int16_t cl_short __attribute__((aligned(2)));
+typedef uint16_t cl_ushort __attribute__((aligned(2)));
+typedef int32_t cl_int __attribute__((aligned(4)));
+typedef uint32_t cl_uint __attribute__((aligned(4)));
+typedef int64_t cl_long __attribute__((aligned(8)));
+typedef uint64_t cl_ulong __attribute__((aligned(8)));
+
+typedef uint16_t cl_half __attribute__((aligned(2)));
+typedef float cl_float __attribute__((aligned(4)));
+typedef double cl_double __attribute__((aligned(8)));
+
+/* Macro names and corresponding values defined by OpenCL */
+#define CL_CHAR_BIT 8
+#define CL_SCHAR_MAX 127
+#define CL_SCHAR_MIN (-127-1)
+#define CL_CHAR_MAX CL_SCHAR_MAX
+#define CL_CHAR_MIN CL_SCHAR_MIN
+#define CL_UCHAR_MAX 255
+#define CL_SHRT_MAX 32767
+#define CL_SHRT_MIN (-32767-1)
+#define CL_USHRT_MAX 65535
+#define CL_INT_MAX 2147483647
+#define CL_INT_MIN (-2147483647-1)
+#define CL_UINT_MAX 0xffffffffU
+#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
+#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
+#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
+
+#define CL_FLT_DIG 6
+#define CL_FLT_MANT_DIG 24
+#define CL_FLT_MAX_10_EXP +38
+#define CL_FLT_MAX_EXP +128
+#define CL_FLT_MIN_10_EXP -37
+#define CL_FLT_MIN_EXP -125
+#define CL_FLT_RADIX 2
+#define CL_FLT_MAX 0x1.fffffep127f
+#define CL_FLT_MIN 0x1.0p-126f
+#define CL_FLT_EPSILON 0x1.0p-23f
+
+#define CL_DBL_DIG 15
+#define CL_DBL_MANT_DIG 53
+#define CL_DBL_MAX_10_EXP +308
+#define CL_DBL_MAX_EXP +1024
+#define CL_DBL_MIN_10_EXP -307
+#define CL_DBL_MIN_EXP -1021
+#define CL_DBL_RADIX 2
+#define CL_DBL_MAX 0x1.fffffffffffffp1023
+#define CL_DBL_MIN 0x1.0p-1022
+#define CL_DBL_EPSILON 0x1.0p-52
+
+#define CL_M_E 2.718281828459045090796
+#define CL_M_LOG2E 1.442695040888963387005
+#define CL_M_LOG10E 0.434294481903251816668
+#define CL_M_LN2 0.693147180559945286227
+#define CL_M_LN10 2.302585092994045901094
+#define CL_M_PI 3.141592653589793115998
+#define CL_M_PI_2 1.570796326794896557999
+#define CL_M_PI_4 0.785398163397448278999
+#define CL_M_1_PI 0.318309886183790691216
+#define CL_M_2_PI 0.636619772367581382433
+#define CL_M_2_SQRTPI 1.128379167095512558561
+#define CL_M_SQRT2 1.414213562373095145475
+#define CL_M_SQRT1_2 0.707106781186547572737
+
+#define CL_M_E_F 2.71828174591064f
+#define CL_M_LOG2E_F 1.44269502162933f
+#define CL_M_LOG10E_F 0.43429449200630f
+#define CL_M_LN2_F 0.69314718246460f
+#define CL_M_LN10_F 2.30258512496948f
+#define CL_M_PI_F 3.14159274101257f
+#define CL_M_PI_2_F 1.57079637050629f
+#define CL_M_PI_4_F 0.78539818525314f
+#define CL_M_1_PI_F 0.31830987334251f
+#define CL_M_2_PI_F 0.63661974668503f
+#define CL_M_2_SQRTPI_F 1.12837922573090f
+#define CL_M_SQRT2_F 1.41421353816986f
+#define CL_M_SQRT1_2_F 0.70710676908493f
+
+#if defined( __GNUC__ )
+ #define CL_HUGE_VALF __builtin_huge_valf()
+ #define CL_HUGE_VAL __builtin_huge_val()
+ #define CL_NAN __builtin_nanf( "" )
+#else
+ #define CL_HUGE_VALF ((cl_float) 1e50)
+ #define CL_HUGE_VAL ((cl_double) 1e500)
+ float nanf( const char * );
+ #define CL_NAN nanf( "" )
+#endif
+#define CL_MAXFLOAT CL_FLT_MAX
+#define CL_INFINITY CL_HUGE_VALF
+
+#endif
+
+#include <stddef.h>
+
+/* Mirror types to GL types. Mirror types allow us to avoid deciding which headers to load based on whether we are using GL or GLES here. */
+typedef unsigned int cl_GLuint;
+typedef int cl_GLint;
+typedef unsigned int cl_GLenum;
+
+/*
+ * Vector types
+ *
+ * Note: OpenCL requires that all types be naturally aligned.
+ * This means that vector types must be naturally aligned.
+ * For example, a vector of four floats must be aligned to
+ * a 16 byte boundary (calculated as 4 * the natural 4-byte
+ * alignment of the float). The alignment qualifiers here
+ * will only function properly if your compiler supports them
+ * and if you don't actively work to defeat them. For example,
+ * in order for a cl_float4 to be 16 byte aligned in a struct,
+ * the start of the struct must itself be 16-byte aligned.
+ *
+ * Maintaining proper alignment is the user's responsibility.
+ */
+
+/* Define basic vector types */
+#if defined( __VEC__ )
+ #include <altivec.h> /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */
+ typedef vector unsigned char __cl_uchar16;
+ typedef vector signed char __cl_char16;
+ typedef vector unsigned short __cl_ushort8;
+ typedef vector signed short __cl_short8;
+ typedef vector unsigned int __cl_uint4;
+ typedef vector signed int __cl_int4;
+ typedef vector float __cl_float4;
+ #define __CL_UCHAR16__ 1
+ #define __CL_CHAR16__ 1
+ #define __CL_USHORT8__ 1
+ #define __CL_SHORT8__ 1
+ #define __CL_UINT4__ 1
+ #define __CL_INT4__ 1
+ #define __CL_FLOAT4__ 1
+#endif
+
+#if defined( __SSE__ )
+ #if defined( __MINGW64__ )
+ #include <intrin.h>
+ #else
+ #include <xmmintrin.h>
+ #endif
+ #if defined( __GNUC__ )
+ typedef float __cl_float4 __attribute__((vector_size(16)));
+ #else
+ typedef __m128 __cl_float4;
+ #endif
+ #define __CL_FLOAT4__ 1
+#endif
+
+#if defined( __SSE2__ )
+ #if defined( __MINGW64__ )
+ #include <intrin.h>
+ #else
+ #include <emmintrin.h>
+ #endif
+ #if defined( __GNUC__ )
+ typedef cl_uchar __cl_uchar16 __attribute__((vector_size(16)));
+ typedef cl_char __cl_char16 __attribute__((vector_size(16)));
+ typedef cl_ushort __cl_ushort8 __attribute__((vector_size(16)));
+ typedef cl_short __cl_short8 __attribute__((vector_size(16)));
+ typedef cl_uint __cl_uint4 __attribute__((vector_size(16)));
+ typedef cl_int __cl_int4 __attribute__((vector_size(16)));
+ typedef cl_ulong __cl_ulong2 __attribute__((vector_size(16)));
+ typedef cl_long __cl_long2 __attribute__((vector_size(16)));
+ typedef cl_double __cl_double2 __attribute__((vector_size(16)));
+ #else
+ typedef __m128i __cl_uchar16;
+ typedef __m128i __cl_char16;
+ typedef __m128i __cl_ushort8;
+ typedef __m128i __cl_short8;
+ typedef __m128i __cl_uint4;
+ typedef __m128i __cl_int4;
+ typedef __m128i __cl_ulong2;
+ typedef __m128i __cl_long2;
+ typedef __m128d __cl_double2;
+ #endif
+ #define __CL_UCHAR16__ 1
+ #define __CL_CHAR16__ 1
+ #define __CL_USHORT8__ 1
+ #define __CL_SHORT8__ 1
+ #define __CL_INT4__ 1
+ #define __CL_UINT4__ 1
+ #define __CL_ULONG2__ 1
+ #define __CL_LONG2__ 1
+ #define __CL_DOUBLE2__ 1
+#endif
+
+#if defined( __MMX__ )
+ #include <mmintrin.h>
+ #if defined( __GNUC__ )
+ typedef cl_uchar __cl_uchar8 __attribute__((vector_size(8)));
+ typedef cl_char __cl_char8 __attribute__((vector_size(8)));
+ typedef cl_ushort __cl_ushort4 __attribute__((vector_size(8)));
+ typedef cl_short __cl_short4 __attribute__((vector_size(8)));
+ typedef cl_uint __cl_uint2 __attribute__((vector_size(8)));
+ typedef cl_int __cl_int2 __attribute__((vector_size(8)));
+ typedef cl_ulong __cl_ulong1 __attribute__((vector_size(8)));
+ typedef cl_long __cl_long1 __attribute__((vector_size(8)));
+ typedef cl_float __cl_float2 __attribute__((vector_size(8)));
+ #else
+ typedef __m64 __cl_uchar8;
+ typedef __m64 __cl_char8;
+ typedef __m64 __cl_ushort4;
+ typedef __m64 __cl_short4;
+ typedef __m64 __cl_uint2;
+ typedef __m64 __cl_int2;
+ typedef __m64 __cl_ulong1;
+ typedef __m64 __cl_long1;
+ typedef __m64 __cl_float2;
+ #endif
+ #define __CL_UCHAR8__ 1
+ #define __CL_CHAR8__ 1
+ #define __CL_USHORT4__ 1
+ #define __CL_SHORT4__ 1
+ #define __CL_INT2__ 1
+ #define __CL_UINT2__ 1
+ #define __CL_ULONG1__ 1
+ #define __CL_LONG1__ 1
+ #define __CL_FLOAT2__ 1
+#endif
+
+#if defined( __AVX__ )
+ #if defined( __MINGW64__ )
+ #include <intrin.h>
+ #else
+ #include <immintrin.h>
+ #endif
+ #if defined( __GNUC__ )
+ typedef cl_float __cl_float8 __attribute__((vector_size(32)));
+ typedef cl_double __cl_double4 __attribute__((vector_size(32)));
+ #else
+ typedef __m256 __cl_float8;
+ typedef __m256d __cl_double4;
+ #endif
+ #define __CL_FLOAT8__ 1
+ #define __CL_DOUBLE4__ 1
+#endif
+
+/* Define alignment keys */
+#if defined( __GNUC__ )
+ #define CL_ALIGNED(_x) __attribute__ ((aligned(_x)))
+#elif defined( _WIN32) && (_MSC_VER)
+ /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements */
+ /* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx */
+ /* #include <crtdefs.h> */
+ /* #define CL_ALIGNED(_x) _CRT_ALIGN(_x) */
+ #define CL_ALIGNED(_x)
+#else
+ #warning Need to implement some method to align data here
+ #define CL_ALIGNED(_x)
+#endif
+
+/* Indicate whether .xyzw, .s0123 and .hi.lo are supported */
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ /* .xyzw and .s0123...{f|F} are supported */
+ #define CL_HAS_NAMED_VECTOR_FIELDS 1
+ /* .hi and .lo are supported */
+ #define CL_HAS_HI_LO_VECTOR_FIELDS 1
+#endif
+
+/* Define cl_vector types */
+
+/* ---- cl_charn ---- */
+typedef union
+{
+ cl_char CL_ALIGNED(2) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_char x, y; };
+ __extension__ struct{ cl_char s0, s1; };
+ __extension__ struct{ cl_char lo, hi; };
+#endif
+#if defined( __CL_CHAR2__)
+ __cl_char2 v2;
+#endif
+}cl_char2;
+
+typedef union
+{
+ cl_char CL_ALIGNED(4) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_char x, y, z, w; };
+ __extension__ struct{ cl_char s0, s1, s2, s3; };
+ __extension__ struct{ cl_char2 lo, hi; };
+#endif
+#if defined( __CL_CHAR2__)
+ __cl_char2 v2[2];
+#endif
+#if defined( __CL_CHAR4__)
+ __cl_char4 v4;
+#endif
+}cl_char4;
+
+/* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */
+typedef cl_char4 cl_char3;
+
+typedef union
+{
+ cl_char CL_ALIGNED(8) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_char x, y, z, w; };
+ __extension__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7; };
+ __extension__ struct{ cl_char4 lo, hi; };
+#endif
+#if defined( __CL_CHAR2__)
+ __cl_char2 v2[4];
+#endif
+#if defined( __CL_CHAR4__)
+ __cl_char4 v4[2];
+#endif
+#if defined( __CL_CHAR8__ )
+ __cl_char8 v8;
+#endif
+}cl_char8;
+
+typedef union
+{
+ cl_char CL_ALIGNED(16) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_char x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __extension__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __extension__ struct{ cl_char8 lo, hi; };
+#endif
+#if defined( __CL_CHAR2__)
+ __cl_char2 v2[8];
+#endif
+#if defined( __CL_CHAR4__)
+ __cl_char4 v4[4];
+#endif
+#if defined( __CL_CHAR8__ )
+ __cl_char8 v8[2];
+#endif
+#if defined( __CL_CHAR16__ )
+ __cl_char16 v16;
+#endif
+}cl_char16;
+
+
+/* ---- cl_ucharn ---- */
+typedef union
+{
+ cl_uchar CL_ALIGNED(2) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_uchar x, y; };
+ __extension__ struct{ cl_uchar s0, s1; };
+ __extension__ struct{ cl_uchar lo, hi; };
+#endif
+#if defined( __cl_uchar2__)
+ __cl_uchar2 v2;
+#endif
+}cl_uchar2;
+
+typedef union
+{
+ cl_uchar CL_ALIGNED(4) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_uchar x, y, z, w; };
+ __extension__ struct{ cl_uchar s0, s1, s2, s3; };
+ __extension__ struct{ cl_uchar2 lo, hi; };
+#endif
+#if defined( __CL_UCHAR2__)
+ __cl_uchar2 v2[2];
+#endif
+#if defined( __CL_UCHAR4__)
+ __cl_uchar4 v4;
+#endif
+}cl_uchar4;
+
+/* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */
+typedef cl_uchar4 cl_uchar3;
+
+typedef union
+{
+ cl_uchar CL_ALIGNED(8) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_uchar x, y, z, w; };
+ __extension__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7; };
+ __extension__ struct{ cl_uchar4 lo, hi; };
+#endif
+#if defined( __CL_UCHAR2__)
+ __cl_uchar2 v2[4];
+#endif
+#if defined( __CL_UCHAR4__)
+ __cl_uchar4 v4[2];
+#endif
+#if defined( __CL_UCHAR8__ )
+ __cl_uchar8 v8;
+#endif
+}cl_uchar8;
+
+typedef union
+{
+ cl_uchar CL_ALIGNED(16) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_uchar x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __extension__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __extension__ struct{ cl_uchar8 lo, hi; };
+#endif
+#if defined( __CL_UCHAR2__)
+ __cl_uchar2 v2[8];
+#endif
+#if defined( __CL_UCHAR4__)
+ __cl_uchar4 v4[4];
+#endif
+#if defined( __CL_UCHAR8__ )
+ __cl_uchar8 v8[2];
+#endif
+#if defined( __CL_UCHAR16__ )
+ __cl_uchar16 v16;
+#endif
+}cl_uchar16;
+
+
+/* ---- cl_shortn ---- */
+typedef union
+{
+ cl_short CL_ALIGNED(4) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_short x, y; };
+ __extension__ struct{ cl_short s0, s1; };
+ __extension__ struct{ cl_short lo, hi; };
+#endif
+#if defined( __CL_SHORT2__)
+ __cl_short2 v2;
+#endif
+}cl_short2;
+
+typedef union
+{
+ cl_short CL_ALIGNED(8) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_short x, y, z, w; };
+ __extension__ struct{ cl_short s0, s1, s2, s3; };
+ __extension__ struct{ cl_short2 lo, hi; };
+#endif
+#if defined( __CL_SHORT2__)
+ __cl_short2 v2[2];
+#endif
+#if defined( __CL_SHORT4__)
+ __cl_short4 v4;
+#endif
+}cl_short4;
+
+/* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */
+typedef cl_short4 cl_short3;
+
+typedef union
+{
+ cl_short CL_ALIGNED(16) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_short x, y, z, w; };
+ __extension__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7; };
+ __extension__ struct{ cl_short4 lo, hi; };
+#endif
+#if defined( __CL_SHORT2__)
+ __cl_short2 v2[4];
+#endif
+#if defined( __CL_SHORT4__)
+ __cl_short4 v4[2];
+#endif
+#if defined( __CL_SHORT8__ )
+ __cl_short8 v8;
+#endif
+}cl_short8;
+
+typedef union
+{
+ cl_short CL_ALIGNED(32) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_short x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __extension__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __extension__ struct{ cl_short8 lo, hi; };
+#endif
+#if defined( __CL_SHORT2__)
+ __cl_short2 v2[8];
+#endif
+#if defined( __CL_SHORT4__)
+ __cl_short4 v4[4];
+#endif
+#if defined( __CL_SHORT8__ )
+ __cl_short8 v8[2];
+#endif
+#if defined( __CL_SHORT16__ )
+ __cl_short16 v16;
+#endif
+}cl_short16;
+
+
+/* ---- cl_ushortn ---- */
+typedef union
+{
+ cl_ushort CL_ALIGNED(4) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_ushort x, y; };
+ __extension__ struct{ cl_ushort s0, s1; };
+ __extension__ struct{ cl_ushort lo, hi; };
+#endif
+#if defined( __CL_USHORT2__)
+ __cl_ushort2 v2;
+#endif
+}cl_ushort2;
+
+typedef union
+{
+ cl_ushort CL_ALIGNED(8) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_ushort x, y, z, w; };
+ __extension__ struct{ cl_ushort s0, s1, s2, s3; };
+ __extension__ struct{ cl_ushort2 lo, hi; };
+#endif
+#if defined( __CL_USHORT2__)
+ __cl_ushort2 v2[2];
+#endif
+#if defined( __CL_USHORT4__)
+ __cl_ushort4 v4;
+#endif
+}cl_ushort4;
+
+/* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */
+typedef cl_ushort4 cl_ushort3;
+
+typedef union
+{
+ cl_ushort CL_ALIGNED(16) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_ushort x, y, z, w; };
+ __extension__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7; };
+ __extension__ struct{ cl_ushort4 lo, hi; };
+#endif
+#if defined( __CL_USHORT2__)
+ __cl_ushort2 v2[4];
+#endif
+#if defined( __CL_USHORT4__)
+ __cl_ushort4 v4[2];
+#endif
+#if defined( __CL_USHORT8__ )
+ __cl_ushort8 v8;
+#endif
+}cl_ushort8;
+
+typedef union
+{
+ cl_ushort CL_ALIGNED(32) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_ushort x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __extension__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __extension__ struct{ cl_ushort8 lo, hi; };
+#endif
+#if defined( __CL_USHORT2__)
+ __cl_ushort2 v2[8];
+#endif
+#if defined( __CL_USHORT4__)
+ __cl_ushort4 v4[4];
+#endif
+#if defined( __CL_USHORT8__ )
+ __cl_ushort8 v8[2];
+#endif
+#if defined( __CL_USHORT16__ )
+ __cl_ushort16 v16;
+#endif
+}cl_ushort16;
+
+/* ---- cl_intn ---- */
+typedef union
+{
+ cl_int CL_ALIGNED(8) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_int x, y; };
+ __extension__ struct{ cl_int s0, s1; };
+ __extension__ struct{ cl_int lo, hi; };
+#endif
+#if defined( __CL_INT2__)
+ __cl_int2 v2;
+#endif
+}cl_int2;
+
+typedef union
+{
+ cl_int CL_ALIGNED(16) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_int x, y, z, w; };
+ __extension__ struct{ cl_int s0, s1, s2, s3; };
+ __extension__ struct{ cl_int2 lo, hi; };
+#endif
+#if defined( __CL_INT2__)
+ __cl_int2 v2[2];
+#endif
+#if defined( __CL_INT4__)
+ __cl_int4 v4;
+#endif
+}cl_int4;
+
+/* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */
+typedef cl_int4 cl_int3;
+
+typedef union
+{
+ cl_int CL_ALIGNED(32) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_int x, y, z, w; };
+ __extension__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7; };
+ __extension__ struct{ cl_int4 lo, hi; };
+#endif
+#if defined( __CL_INT2__)
+ __cl_int2 v2[4];
+#endif
+#if defined( __CL_INT4__)
+ __cl_int4 v4[2];
+#endif
+#if defined( __CL_INT8__ )
+ __cl_int8 v8;
+#endif
+}cl_int8;
+
+typedef union
+{
+ cl_int CL_ALIGNED(64) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_int x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __extension__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __extension__ struct{ cl_int8 lo, hi; };
+#endif
+#if defined( __CL_INT2__)
+ __cl_int2 v2[8];
+#endif
+#if defined( __CL_INT4__)
+ __cl_int4 v4[4];
+#endif
+#if defined( __CL_INT8__ )
+ __cl_int8 v8[2];
+#endif
+#if defined( __CL_INT16__ )
+ __cl_int16 v16;
+#endif
+}cl_int16;
+
+
+/* ---- cl_uintn ---- */
+typedef union
+{
+ cl_uint CL_ALIGNED(8) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_uint x, y; };
+ __extension__ struct{ cl_uint s0, s1; };
+ __extension__ struct{ cl_uint lo, hi; };
+#endif
+#if defined( __CL_UINT2__)
+ __cl_uint2 v2;
+#endif
+}cl_uint2;
+
+typedef union
+{
+ cl_uint CL_ALIGNED(16) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_uint x, y, z, w; };
+ __extension__ struct{ cl_uint s0, s1, s2, s3; };
+ __extension__ struct{ cl_uint2 lo, hi; };
+#endif
+#if defined( __CL_UINT2__)
+ __cl_uint2 v2[2];
+#endif
+#if defined( __CL_UINT4__)
+ __cl_uint4 v4;
+#endif
+}cl_uint4;
+
+/* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */
+typedef cl_uint4 cl_uint3;
+
+typedef union
+{
+ cl_uint CL_ALIGNED(32) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_uint x, y, z, w; };
+ __extension__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7; };
+ __extension__ struct{ cl_uint4 lo, hi; };
+#endif
+#if defined( __CL_UINT2__)
+ __cl_uint2 v2[4];
+#endif
+#if defined( __CL_UINT4__)
+ __cl_uint4 v4[2];
+#endif
+#if defined( __CL_UINT8__ )
+ __cl_uint8 v8;
+#endif
+}cl_uint8;
+
+typedef union
+{
+ cl_uint CL_ALIGNED(64) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_uint x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __extension__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __extension__ struct{ cl_uint8 lo, hi; };
+#endif
+#if defined( __CL_UINT2__)
+ __cl_uint2 v2[8];
+#endif
+#if defined( __CL_UINT4__)
+ __cl_uint4 v4[4];
+#endif
+#if defined( __CL_UINT8__ )
+ __cl_uint8 v8[2];
+#endif
+#if defined( __CL_UINT16__ )
+ __cl_uint16 v16;
+#endif
+}cl_uint16;
+
+/* ---- cl_longn ---- */
+typedef union
+{
+ cl_long CL_ALIGNED(16) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_long x, y; };
+ __extension__ struct{ cl_long s0, s1; };
+ __extension__ struct{ cl_long lo, hi; };
+#endif
+#if defined( __CL_LONG2__)
+ __cl_long2 v2;
+#endif
+}cl_long2;
+
+typedef union
+{
+ cl_long CL_ALIGNED(32) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_long x, y, z, w; };
+ __extension__ struct{ cl_long s0, s1, s2, s3; };
+ __extension__ struct{ cl_long2 lo, hi; };
+#endif
+#if defined( __CL_LONG2__)
+ __cl_long2 v2[2];
+#endif
+#if defined( __CL_LONG4__)
+ __cl_long4 v4;
+#endif
+}cl_long4;
+
+/* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */
+typedef cl_long4 cl_long3;
+
+typedef union
+{
+ cl_long CL_ALIGNED(64) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_long x, y, z, w; };
+ __extension__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7; };
+ __extension__ struct{ cl_long4 lo, hi; };
+#endif
+#if defined( __CL_LONG2__)
+ __cl_long2 v2[4];
+#endif
+#if defined( __CL_LONG4__)
+ __cl_long4 v4[2];
+#endif
+#if defined( __CL_LONG8__ )
+ __cl_long8 v8;
+#endif
+}cl_long8;
+
+typedef union
+{
+ cl_long CL_ALIGNED(128) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_long x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __extension__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __extension__ struct{ cl_long8 lo, hi; };
+#endif
+#if defined( __CL_LONG2__)
+ __cl_long2 v2[8];
+#endif
+#if defined( __CL_LONG4__)
+ __cl_long4 v4[4];
+#endif
+#if defined( __CL_LONG8__ )
+ __cl_long8 v8[2];
+#endif
+#if defined( __CL_LONG16__ )
+ __cl_long16 v16;
+#endif
+}cl_long16;
+
+
+/* ---- cl_ulongn ---- */
+typedef union
+{
+ cl_ulong CL_ALIGNED(16) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_ulong x, y; };
+ __extension__ struct{ cl_ulong s0, s1; };
+ __extension__ struct{ cl_ulong lo, hi; };
+#endif
+#if defined( __CL_ULONG2__)
+ __cl_ulong2 v2;
+#endif
+}cl_ulong2;
+
+typedef union
+{
+ cl_ulong CL_ALIGNED(32) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_ulong x, y, z, w; };
+ __extension__ struct{ cl_ulong s0, s1, s2, s3; };
+ __extension__ struct{ cl_ulong2 lo, hi; };
+#endif
+#if defined( __CL_ULONG2__)
+ __cl_ulong2 v2[2];
+#endif
+#if defined( __CL_ULONG4__)
+ __cl_ulong4 v4;
+#endif
+}cl_ulong4;
+
+/* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */
+typedef cl_ulong4 cl_ulong3;
+
+typedef union
+{
+ cl_ulong CL_ALIGNED(64) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_ulong x, y, z, w; };
+ __extension__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7; };
+ __extension__ struct{ cl_ulong4 lo, hi; };
+#endif
+#if defined( __CL_ULONG2__)
+ __cl_ulong2 v2[4];
+#endif
+#if defined( __CL_ULONG4__)
+ __cl_ulong4 v4[2];
+#endif
+#if defined( __CL_ULONG8__ )
+ __cl_ulong8 v8;
+#endif
+}cl_ulong8;
+
+typedef union
+{
+ cl_ulong CL_ALIGNED(128) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_ulong x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __extension__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __extension__ struct{ cl_ulong8 lo, hi; };
+#endif
+#if defined( __CL_ULONG2__)
+ __cl_ulong2 v2[8];
+#endif
+#if defined( __CL_ULONG4__)
+ __cl_ulong4 v4[4];
+#endif
+#if defined( __CL_ULONG8__ )
+ __cl_ulong8 v8[2];
+#endif
+#if defined( __CL_ULONG16__ )
+ __cl_ulong16 v16;
+#endif
+}cl_ulong16;
+
+
+/* --- cl_floatn ---- */
+
+typedef union
+{
+ cl_float CL_ALIGNED(8) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_float x, y; };
+ __extension__ struct{ cl_float s0, s1; };
+ __extension__ struct{ cl_float lo, hi; };
+#endif
+#if defined( __CL_FLOAT2__)
+ __cl_float2 v2;
+#endif
+}cl_float2;
+
+typedef union
+{
+ cl_float CL_ALIGNED(16) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_float x, y, z, w; };
+ __extension__ struct{ cl_float s0, s1, s2, s3; };
+ __extension__ struct{ cl_float2 lo, hi; };
+#endif
+#if defined( __CL_FLOAT2__)
+ __cl_float2 v2[2];
+#endif
+#if defined( __CL_FLOAT4__)
+ __cl_float4 v4;
+#endif
+}cl_float4;
+
+/* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */
+typedef cl_float4 cl_float3;
+
+typedef union
+{
+ cl_float CL_ALIGNED(32) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_float x, y, z, w; };
+ __extension__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7; };
+ __extension__ struct{ cl_float4 lo, hi; };
+#endif
+#if defined( __CL_FLOAT2__)
+ __cl_float2 v2[4];
+#endif
+#if defined( __CL_FLOAT4__)
+ __cl_float4 v4[2];
+#endif
+#if defined( __CL_FLOAT8__ )
+ __cl_float8 v8;
+#endif
+}cl_float8;
+
+typedef union
+{
+ cl_float CL_ALIGNED(64) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_float x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __extension__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __extension__ struct{ cl_float8 lo, hi; };
+#endif
+#if defined( __CL_FLOAT2__)
+ __cl_float2 v2[8];
+#endif
+#if defined( __CL_FLOAT4__)
+ __cl_float4 v4[4];
+#endif
+#if defined( __CL_FLOAT8__ )
+ __cl_float8 v8[2];
+#endif
+#if defined( __CL_FLOAT16__ )
+ __cl_float16 v16;
+#endif
+}cl_float16;
+
+/* --- cl_doublen ---- */
+
+typedef union
+{
+ cl_double CL_ALIGNED(16) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_double x, y; };
+ __extension__ struct{ cl_double s0, s1; };
+ __extension__ struct{ cl_double lo, hi; };
+#endif
+#if defined( __CL_DOUBLE2__)
+ __cl_double2 v2;
+#endif
+}cl_double2;
+
+typedef union
+{
+ cl_double CL_ALIGNED(32) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_double x, y, z, w; };
+ __extension__ struct{ cl_double s0, s1, s2, s3; };
+ __extension__ struct{ cl_double2 lo, hi; };
+#endif
+#if defined( __CL_DOUBLE2__)
+ __cl_double2 v2[2];
+#endif
+#if defined( __CL_DOUBLE4__)
+ __cl_double4 v4;
+#endif
+}cl_double4;
+
+/* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */
+typedef cl_double4 cl_double3;
+
+typedef union
+{
+ cl_double CL_ALIGNED(64) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_double x, y, z, w; };
+ __extension__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7; };
+ __extension__ struct{ cl_double4 lo, hi; };
+#endif
+#if defined( __CL_DOUBLE2__)
+ __cl_double2 v2[4];
+#endif
+#if defined( __CL_DOUBLE4__)
+ __cl_double4 v4[2];
+#endif
+#if defined( __CL_DOUBLE8__ )
+ __cl_double8 v8;
+#endif
+}cl_double8;
+
+typedef union
+{
+ cl_double CL_ALIGNED(128) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_double x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __extension__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __extension__ struct{ cl_double8 lo, hi; };
+#endif
+#if defined( __CL_DOUBLE2__)
+ __cl_double2 v2[8];
+#endif
+#if defined( __CL_DOUBLE4__)
+ __cl_double4 v4[4];
+#endif
+#if defined( __CL_DOUBLE8__ )
+ __cl_double8 v8[2];
+#endif
+#if defined( __CL_DOUBLE16__ )
+ __cl_double16 v16;
+#endif
+}cl_double16;
+
+/* Macro to facilitate debugging
+ * Usage:
+ * Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source.
+ * The first line ends with: CL_PROGRAM_STRING_BEGIN \"
+ * Each line thereafter of OpenCL C source must end with: \n\
+ * The last line ends in ";
+ *
+ * Example:
+ *
+ * const char *my_program = CL_PROGRAM_STRING_BEGIN "\
+ * kernel void foo( int a, float * b ) \n\
+ * { \n\
+ * // my comment \n\
+ * *b[ get_global_id(0)] = a; \n\
+ * } \n\
+ * ";
+ *
+ * This should correctly set up the line, (column) and file information for your source
+ * string so you can do source level debugging.
+ */
+#define __CL_STRINGIFY( _x ) # _x
+#define _CL_STRINGIFY( _x ) __CL_STRINGIFY( _x )
+#define CL_PROGRAM_STRING_DEBUG_INFO "#line " _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __CL_PLATFORM_H */
diff --git a/include/CL/glext.h b/include/CL/glext.h
new file mode 100644
index 0000000..ad00446
--- /dev/null
+++ b/include/CL/glext.h
@@ -0,0 +1,8662 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __glext_h_
+#define __glext_h_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** Copyright (c) 2007 The Khronos Group Inc.
+**
+** Permission is hereby granted, free of charge, to any person obtaining a
+** copy of this software and/or associated documentation files (the
+** "Materials"), to deal in the Materials without restriction, including
+** without limitation the rights to use, copy, modify, merge, publish,
+** distribute, sublicense, and/or sell copies of the Materials, and to
+** permit persons to whom the Materials are furnished to do so, subject to
+** the following conditions:
+**
+** The above copyright notice and this permission notice shall be included
+** in all copies or substantial portions of the Materials.
+**
+** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+*/
+
+#if defined(_WIN32) && !defined(APIENTRY) && !defined(__CYGWIN__) && !defined(__SCITECH_SNAP__)
+#define WIN32_LEAN_AND_MEAN 1
+#include <windows.h>
+#endif
+
+#ifndef APIENTRY
+#define APIENTRY
+#endif
+#ifndef APIENTRYP
+#define APIENTRYP APIENTRY *
+#endif
+#ifndef GLAPI
+#define GLAPI extern
+#endif
+
+/*************************************************************/
+
+/* Header file version number, required by OpenGL ABI for Linux */
+/* glext.h last updated 2009/03/19 */
+/* Current version at http://www.opengl.org/registry/ */
+#define GL_GLEXT_VERSION 48
+
+#ifndef GL_VERSION_1_2
+#define GL_UNSIGNED_BYTE_3_3_2 0x8032
+#define GL_UNSIGNED_SHORT_4_4_4_4 0x8033
+#define GL_UNSIGNED_SHORT_5_5_5_1 0x8034
+#define GL_UNSIGNED_INT_8_8_8_8 0x8035
+#define GL_UNSIGNED_INT_10_10_10_2 0x8036
+#define GL_RESCALE_NORMAL 0x803A
+#define GL_TEXTURE_BINDING_3D 0x806A
+#define GL_PACK_SKIP_IMAGES 0x806B
+#define GL_PACK_IMAGE_HEIGHT 0x806C
+#define GL_UNPACK_SKIP_IMAGES 0x806D
+#define GL_UNPACK_IMAGE_HEIGHT 0x806E
+#define GL_TEXTURE_3D 0x806F
+#define GL_PROXY_TEXTURE_3D 0x8070
+#define GL_TEXTURE_DEPTH 0x8071
+#define GL_TEXTURE_WRAP_R 0x8072
+#define GL_MAX_3D_TEXTURE_SIZE 0x8073
+#define GL_UNSIGNED_BYTE_2_3_3_REV 0x8362
+#define GL_UNSIGNED_SHORT_5_6_5 0x8363
+#define GL_UNSIGNED_SHORT_5_6_5_REV 0x8364
+#define GL_UNSIGNED_SHORT_4_4_4_4_REV 0x8365
+#define GL_UNSIGNED_SHORT_1_5_5_5_REV 0x8366
+#define GL_UNSIGNED_INT_8_8_8_8_REV 0x8367
+#define GL_UNSIGNED_INT_2_10_10_10_REV 0x8368
+#define GL_BGR 0x80E0
+#define GL_BGRA 0x80E1
+#define GL_MAX_ELEMENTS_VERTICES 0x80E8
+#define GL_MAX_ELEMENTS_INDICES 0x80E9
+#define GL_CLAMP_TO_EDGE 0x812F
+#define GL_TEXTURE_MIN_LOD 0x813A
+#define GL_TEXTURE_MAX_LOD 0x813B
+#define GL_TEXTURE_BASE_LEVEL 0x813C
+#define GL_TEXTURE_MAX_LEVEL 0x813D
+#define GL_LIGHT_MODEL_COLOR_CONTROL 0x81F8
+#define GL_SINGLE_COLOR 0x81F9
+#define GL_SEPARATE_SPECULAR_COLOR 0x81FA
+#define GL_SMOOTH_POINT_SIZE_RANGE 0x0B12
+#define GL_SMOOTH_POINT_SIZE_GRANULARITY 0x0B13
+#define GL_SMOOTH_LINE_WIDTH_RANGE 0x0B22
+#define GL_SMOOTH_LINE_WIDTH_GRANULARITY 0x0B23
+#define GL_ALIASED_POINT_SIZE_RANGE 0x846D
+#define GL_ALIASED_LINE_WIDTH_RANGE 0x846E
+#endif
+
+#ifndef GL_ARB_imaging
+#define GL_CONSTANT_COLOR 0x8001
+#define GL_ONE_MINUS_CONSTANT_COLOR 0x8002
+#define GL_CONSTANT_ALPHA 0x8003
+#define GL_ONE_MINUS_CONSTANT_ALPHA 0x8004
+#define GL_BLEND_COLOR 0x8005
+#define GL_FUNC_ADD 0x8006
+#define GL_MIN 0x8007
+#define GL_MAX 0x8008
+#define GL_BLEND_EQUATION 0x8009
+#define GL_FUNC_SUBTRACT 0x800A
+#define GL_FUNC_REVERSE_SUBTRACT 0x800B
+#define GL_CONVOLUTION_1D 0x8010
+#define GL_CONVOLUTION_2D 0x8011
+#define GL_SEPARABLE_2D 0x8012
+#define GL_CONVOLUTION_BORDER_MODE 0x8013
+#define GL_CONVOLUTION_FILTER_SCALE 0x8014
+#define GL_CONVOLUTION_FILTER_BIAS 0x8015
+#define GL_REDUCE 0x8016
+#define GL_CONVOLUTION_FORMAT 0x8017
+#define GL_CONVOLUTION_WIDTH 0x8018
+#define GL_CONVOLUTION_HEIGHT 0x8019
+#define GL_MAX_CONVOLUTION_WIDTH 0x801A
+#define GL_MAX_CONVOLUTION_HEIGHT 0x801B
+#define GL_POST_CONVOLUTION_RED_SCALE 0x801C
+#define GL_POST_CONVOLUTION_GREEN_SCALE 0x801D
+#define GL_POST_CONVOLUTION_BLUE_SCALE 0x801E
+#define GL_POST_CONVOLUTION_ALPHA_SCALE 0x801F
+#define GL_POST_CONVOLUTION_RED_BIAS 0x8020
+#define GL_POST_CONVOLUTION_GREEN_BIAS 0x8021
+#define GL_POST_CONVOLUTION_BLUE_BIAS 0x8022
+#define GL_POST_CONVOLUTION_ALPHA_BIAS 0x8023
+#define GL_HISTOGRAM 0x8024
+#define GL_PROXY_HISTOGRAM 0x8025
+#define GL_HISTOGRAM_WIDTH 0x8026
+#define GL_HISTOGRAM_FORMAT 0x8027
+#define GL_HISTOGRAM_RED_SIZE 0x8028
+#define GL_HISTOGRAM_GREEN_SIZE 0x8029
+#define GL_HISTOGRAM_BLUE_SIZE 0x802A
+#define GL_HISTOGRAM_ALPHA_SIZE 0x802B
+#define GL_HISTOGRAM_LUMINANCE_SIZE 0x802C
+#define GL_HISTOGRAM_SINK 0x802D
+#define GL_MINMAX 0x802E
+#define GL_MINMAX_FORMAT 0x802F
+#define GL_MINMAX_SINK 0x8030
+#define GL_TABLE_TOO_LARGE 0x8031
+#define GL_COLOR_MATRIX 0x80B1
+#define GL_COLOR_MATRIX_STACK_DEPTH 0x80B2
+#define GL_MAX_COLOR_MATRIX_STACK_DEPTH 0x80B3
+#define GL_POST_COLOR_MATRIX_RED_SCALE 0x80B4
+#define GL_POST_COLOR_MATRIX_GREEN_SCALE 0x80B5
+#define GL_POST_COLOR_MATRIX_BLUE_SCALE 0x80B6
+#define GL_POST_COLOR_MATRIX_ALPHA_SCALE 0x80B7
+#define GL_POST_COLOR_MATRIX_RED_BIAS 0x80B8
+#define GL_POST_COLOR_MATRIX_GREEN_BIAS 0x80B9
+#define GL_POST_COLOR_MATRIX_BLUE_BIAS 0x80BA
+#define GL_POST_COLOR_MATRIX_ALPHA_BIAS 0x80BB
+#define GL_COLOR_TABLE 0x80D0
+#define GL_POST_CONVOLUTION_COLOR_TABLE 0x80D1
+#define GL_POST_COLOR_MATRIX_COLOR_TABLE 0x80D2
+#define GL_PROXY_COLOR_TABLE 0x80D3
+#define GL_PROXY_POST_CONVOLUTION_COLOR_TABLE 0x80D4
+#define GL_PROXY_POST_COLOR_MATRIX_COLOR_TABLE 0x80D5
+#define GL_COLOR_TABLE_SCALE 0x80D6
+#define GL_COLOR_TABLE_BIAS 0x80D7
+#define GL_COLOR_TABLE_FORMAT 0x80D8
+#define GL_COLOR_TABLE_WIDTH 0x80D9
+#define GL_COLOR_TABLE_RED_SIZE 0x80DA
+#define GL_COLOR_TABLE_GREEN_SIZE 0x80DB
+#define GL_COLOR_TABLE_BLUE_SIZE 0x80DC
+#define GL_COLOR_TABLE_ALPHA_SIZE 0x80DD
+#define GL_COLOR_TABLE_LUMINANCE_SIZE 0x80DE
+#define GL_COLOR_TABLE_INTENSITY_SIZE 0x80DF
+#define GL_CONSTANT_BORDER 0x8151
+#define GL_REPLICATE_BORDER 0x8153
+#define GL_CONVOLUTION_BORDER_COLOR 0x8154
+#endif
+
+#ifndef GL_VERSION_1_3
+#define GL_TEXTURE0 0x84C0
+#define GL_TEXTURE1 0x84C1
+#define GL_TEXTURE2 0x84C2
+#define GL_TEXTURE3 0x84C3
+#define GL_TEXTURE4 0x84C4
+#define GL_TEXTURE5 0x84C5
+#define GL_TEXTURE6 0x84C6
+#define GL_TEXTURE7 0x84C7
+#define GL_TEXTURE8 0x84C8
+#define GL_TEXTURE9 0x84C9
+#define GL_TEXTURE10 0x84CA
+#define GL_TEXTURE11 0x84CB
+#define GL_TEXTURE12 0x84CC
+#define GL_TEXTURE13 0x84CD
+#define GL_TEXTURE14 0x84CE
+#define GL_TEXTURE15 0x84CF
+#define GL_TEXTURE16 0x84D0
+#define GL_TEXTURE17 0x84D1
+#define GL_TEXTURE18 0x84D2
+#define GL_TEXTURE19 0x84D3
+#define GL_TEXTURE20 0x84D4
+#define GL_TEXTURE21 0x84D5
+#define GL_TEXTURE22 0x84D6
+#define GL_TEXTURE23 0x84D7
+#define GL_TEXTURE24 0x84D8
+#define GL_TEXTURE25 0x84D9
+#define GL_TEXTURE26 0x84DA
+#define GL_TEXTURE27 0x84DB
+#define GL_TEXTURE28 0x84DC
+#define GL_TEXTURE29 0x84DD
+#define GL_TEXTURE30 0x84DE
+#define GL_TEXTURE31 0x84DF
+#define GL_ACTIVE_TEXTURE 0x84E0
+#define GL_CLIENT_ACTIVE_TEXTURE 0x84E1
+#define GL_MAX_TEXTURE_UNITS 0x84E2
+#define GL_TRANSPOSE_MODELVIEW_MATRIX 0x84E3
+#define GL_TRANSPOSE_PROJECTION_MATRIX 0x84E4
+#define GL_TRANSPOSE_TEXTURE_MATRIX 0x84E5
+#define GL_TRANSPOSE_COLOR_MATRIX 0x84E6
+#define GL_MULTISAMPLE 0x809D
+#define GL_SAMPLE_ALPHA_TO_COVERAGE 0x809E
+#define GL_SAMPLE_ALPHA_TO_ONE 0x809F
+#define GL_SAMPLE_COVERAGE 0x80A0
+#define GL_SAMPLE_BUFFERS 0x80A8
+#define GL_SAMPLES 0x80A9
+#define GL_SAMPLE_COVERAGE_VALUE 0x80AA
+#define GL_SAMPLE_COVERAGE_INVERT 0x80AB
+#define GL_MULTISAMPLE_BIT 0x20000000
+#define GL_NORMAL_MAP 0x8511
+#define GL_REFLECTION_MAP 0x8512
+#define GL_TEXTURE_CUBE_MAP 0x8513
+#define GL_TEXTURE_BINDING_CUBE_MAP 0x8514
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_X 0x8515
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_X 0x8516
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_Y 0x8517
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Y 0x8518
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_Z 0x8519
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Z 0x851A
+#define GL_PROXY_TEXTURE_CUBE_MAP 0x851B
+#define GL_MAX_CUBE_MAP_TEXTURE_SIZE 0x851C
+#define GL_COMPRESSED_ALPHA 0x84E9
+#define GL_COMPRESSED_LUMINANCE 0x84EA
+#define GL_COMPRESSED_LUMINANCE_ALPHA 0x84EB
+#define GL_COMPRESSED_INTENSITY 0x84EC
+#define GL_COMPRESSED_RGB 0x84ED
+#define GL_COMPRESSED_RGBA 0x84EE
+#define GL_TEXTURE_COMPRESSION_HINT 0x84EF
+#define GL_TEXTURE_COMPRESSED_IMAGE_SIZE 0x86A0
+#define GL_TEXTURE_COMPRESSED 0x86A1
+#define GL_NUM_COMPRESSED_TEXTURE_FORMATS 0x86A2
+#define GL_COMPRESSED_TEXTURE_FORMATS 0x86A3
+#define GL_CLAMP_TO_BORDER 0x812D
+#define GL_COMBINE 0x8570
+#define GL_COMBINE_RGB 0x8571
+#define GL_COMBINE_ALPHA 0x8572
+#define GL_SOURCE0_RGB 0x8580
+#define GL_SOURCE1_RGB 0x8581
+#define GL_SOURCE2_RGB 0x8582
+#define GL_SOURCE0_ALPHA 0x8588
+#define GL_SOURCE1_ALPHA 0x8589
+#define GL_SOURCE2_ALPHA 0x858A
+#define GL_OPERAND0_RGB 0x8590
+#define GL_OPERAND1_RGB 0x8591
+#define GL_OPERAND2_RGB 0x8592
+#define GL_OPERAND0_ALPHA 0x8598
+#define GL_OPERAND1_ALPHA 0x8599
+#define GL_OPERAND2_ALPHA 0x859A
+#define GL_RGB_SCALE 0x8573
+#define GL_ADD_SIGNED 0x8574
+#define GL_INTERPOLATE 0x8575
+#define GL_SUBTRACT 0x84E7
+#define GL_CONSTANT 0x8576
+#define GL_PRIMARY_COLOR 0x8577
+#define GL_PREVIOUS 0x8578
+#define GL_DOT3_RGB 0x86AE
+#define GL_DOT3_RGBA 0x86AF
+#endif
+
+#ifndef GL_VERSION_1_4
+#define GL_BLEND_DST_RGB 0x80C8
+#define GL_BLEND_SRC_RGB 0x80C9
+#define GL_BLEND_DST_ALPHA 0x80CA
+#define GL_BLEND_SRC_ALPHA 0x80CB
+#define GL_POINT_SIZE_MIN 0x8126
+#define GL_POINT_SIZE_MAX 0x8127
+#define GL_POINT_FADE_THRESHOLD_SIZE 0x8128
+#define GL_POINT_DISTANCE_ATTENUATION 0x8129
+#define GL_GENERATE_MIPMAP 0x8191
+#define GL_GENERATE_MIPMAP_HINT 0x8192
+#define GL_DEPTH_COMPONENT16 0x81A5
+#define GL_DEPTH_COMPONENT24 0x81A6
+#define GL_DEPTH_COMPONENT32 0x81A7
+#define GL_MIRRORED_REPEAT 0x8370
+#define GL_FOG_COORDINATE_SOURCE 0x8450
+#define GL_FOG_COORDINATE 0x8451
+#define GL_FRAGMENT_DEPTH 0x8452
+#define GL_CURRENT_FOG_COORDINATE 0x8453
+#define GL_FOG_COORDINATE_ARRAY_TYPE 0x8454
+#define GL_FOG_COORDINATE_ARRAY_STRIDE 0x8455
+#define GL_FOG_COORDINATE_ARRAY_POINTER 0x8456
+#define GL_FOG_COORDINATE_ARRAY 0x8457
+#define GL_COLOR_SUM 0x8458
+#define GL_CURRENT_SECONDARY_COLOR 0x8459
+#define GL_SECONDARY_COLOR_ARRAY_SIZE 0x845A
+#define GL_SECONDARY_COLOR_ARRAY_TYPE 0x845B
+#define GL_SECONDARY_COLOR_ARRAY_STRIDE 0x845C
+#define GL_SECONDARY_COLOR_ARRAY_POINTER 0x845D
+#define GL_SECONDARY_COLOR_ARRAY 0x845E
+#define GL_MAX_TEXTURE_LOD_BIAS 0x84FD
+#define GL_TEXTURE_FILTER_CONTROL 0x8500
+#define GL_TEXTURE_LOD_BIAS 0x8501
+#define GL_INCR_WRAP 0x8507
+#define GL_DECR_WRAP 0x8508
+#define GL_TEXTURE_DEPTH_SIZE 0x884A
+#define GL_DEPTH_TEXTURE_MODE 0x884B
+#define GL_TEXTURE_COMPARE_MODE 0x884C
+#define GL_TEXTURE_COMPARE_FUNC 0x884D
+#define GL_COMPARE_R_TO_TEXTURE 0x884E
+#endif
+
+#ifndef GL_VERSION_1_5
+#define GL_BUFFER_SIZE 0x8764
+#define GL_BUFFER_USAGE 0x8765
+#define GL_QUERY_COUNTER_BITS 0x8864
+#define GL_CURRENT_QUERY 0x8865
+#define GL_QUERY_RESULT 0x8866
+#define GL_QUERY_RESULT_AVAILABLE 0x8867
+#define GL_ARRAY_BUFFER 0x8892
+#define GL_ELEMENT_ARRAY_BUFFER 0x8893
+#define GL_ARRAY_BUFFER_BINDING 0x8894
+#define GL_ELEMENT_ARRAY_BUFFER_BINDING 0x8895
+#define GL_VERTEX_ARRAY_BUFFER_BINDING 0x8896
+#define GL_NORMAL_ARRAY_BUFFER_BINDING 0x8897
+#define GL_COLOR_ARRAY_BUFFER_BINDING 0x8898
+#define GL_INDEX_ARRAY_BUFFER_BINDING 0x8899
+#define GL_TEXTURE_COORD_ARRAY_BUFFER_BINDING 0x889A
+#define GL_EDGE_FLAG_ARRAY_BUFFER_BINDING 0x889B
+#define GL_SECONDARY_COLOR_ARRAY_BUFFER_BINDING 0x889C
+#define GL_FOG_COORDINATE_ARRAY_BUFFER_BINDING 0x889D
+#define GL_WEIGHT_ARRAY_BUFFER_BINDING 0x889E
+#define GL_VERTEX_ATTRIB_ARRAY_BUFFER_BINDING 0x889F
+#define GL_READ_ONLY 0x88B8
+#define GL_WRITE_ONLY 0x88B9
+#define GL_READ_WRITE 0x88BA
+#define GL_BUFFER_ACCESS 0x88BB
+#define GL_BUFFER_MAPPED 0x88BC
+#define GL_BUFFER_MAP_POINTER 0x88BD
+#define GL_STREAM_DRAW 0x88E0
+#define GL_STREAM_READ 0x88E1
+#define GL_STREAM_COPY 0x88E2
+#define GL_STATIC_DRAW 0x88E4
+#define GL_STATIC_READ 0x88E5
+#define GL_STATIC_COPY 0x88E6
+#define GL_DYNAMIC_DRAW 0x88E8
+#define GL_DYNAMIC_READ 0x88E9
+#define GL_DYNAMIC_COPY 0x88EA
+#define GL_SAMPLES_PASSED 0x8914
+#define GL_FOG_COORD_SRC GL_FOG_COORDINATE_SOURCE
+#define GL_FOG_COORD GL_FOG_COORDINATE
+#define GL_CURRENT_FOG_COORD GL_CURRENT_FOG_COORDINATE
+#define GL_FOG_COORD_ARRAY_TYPE GL_FOG_COORDINATE_ARRAY_TYPE
+#define GL_FOG_COORD_ARRAY_STRIDE GL_FOG_COORDINATE_ARRAY_STRIDE
+#define GL_FOG_COORD_ARRAY_POINTER GL_FOG_COORDINATE_ARRAY_POINTER
+#define GL_FOG_COORD_ARRAY GL_FOG_COORDINATE_ARRAY
+#define GL_FOG_COORD_ARRAY_BUFFER_BINDING GL_FOG_COORDINATE_ARRAY_BUFFER_BINDING
+#define GL_SRC0_RGB GL_SOURCE0_RGB
+#define GL_SRC1_RGB GL_SOURCE1_RGB
+#define GL_SRC2_RGB GL_SOURCE2_RGB
+#define GL_SRC0_ALPHA GL_SOURCE0_ALPHA
+#define GL_SRC1_ALPHA GL_SOURCE1_ALPHA
+#define GL_SRC2_ALPHA GL_SOURCE2_ALPHA
+#endif
+
+#ifndef GL_VERSION_2_0
+#define GL_BLEND_EQUATION_RGB GL_BLEND_EQUATION
+#define GL_VERTEX_ATTRIB_ARRAY_ENABLED 0x8622
+#define GL_VERTEX_ATTRIB_ARRAY_SIZE 0x8623
+#define GL_VERTEX_ATTRIB_ARRAY_STRIDE 0x8624
+#define GL_VERTEX_ATTRIB_ARRAY_TYPE 0x8625
+#define GL_CURRENT_VERTEX_ATTRIB 0x8626
+#define GL_VERTEX_PROGRAM_POINT_SIZE 0x8642
+#define GL_VERTEX_PROGRAM_TWO_SIDE 0x8643
+#define GL_VERTEX_ATTRIB_ARRAY_POINTER 0x8645
+#define GL_STENCIL_BACK_FUNC 0x8800
+#define GL_STENCIL_BACK_FAIL 0x8801
+#define GL_STENCIL_BACK_PASS_DEPTH_FAIL 0x8802
+#define GL_STENCIL_BACK_PASS_DEPTH_PASS 0x8803
+#define GL_MAX_DRAW_BUFFERS 0x8824
+#define GL_DRAW_BUFFER0 0x8825
+#define GL_DRAW_BUFFER1 0x8826
+#define GL_DRAW_BUFFER2 0x8827
+#define GL_DRAW_BUFFER3 0x8828
+#define GL_DRAW_BUFFER4 0x8829
+#define GL_DRAW_BUFFER5 0x882A
+#define GL_DRAW_BUFFER6 0x882B
+#define GL_DRAW_BUFFER7 0x882C
+#define GL_DRAW_BUFFER8 0x882D
+#define GL_DRAW_BUFFER9 0x882E
+#define GL_DRAW_BUFFER10 0x882F
+#define GL_DRAW_BUFFER11 0x8830
+#define GL_DRAW_BUFFER12 0x8831
+#define GL_DRAW_BUFFER13 0x8832
+#define GL_DRAW_BUFFER14 0x8833
+#define GL_DRAW_BUFFER15 0x8834
+#define GL_BLEND_EQUATION_ALPHA 0x883D
+#define GL_POINT_SPRITE 0x8861
+#define GL_COORD_REPLACE 0x8862
+#define GL_MAX_VERTEX_ATTRIBS 0x8869
+#define GL_VERTEX_ATTRIB_ARRAY_NORMALIZED 0x886A
+#define GL_MAX_TEXTURE_COORDS 0x8871
+#define GL_MAX_TEXTURE_IMAGE_UNITS 0x8872
+#define GL_FRAGMENT_SHADER 0x8B30
+#define GL_VERTEX_SHADER 0x8B31
+#define GL_MAX_FRAGMENT_UNIFORM_COMPONENTS 0x8B49
+#define GL_MAX_VERTEX_UNIFORM_COMPONENTS 0x8B4A
+#define GL_MAX_VARYING_FLOATS 0x8B4B
+#define GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS 0x8B4C
+#define GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS 0x8B4D
+#define GL_SHADER_TYPE 0x8B4F
+#define GL_FLOAT_VEC2 0x8B50
+#define GL_FLOAT_VEC3 0x8B51
+#define GL_FLOAT_VEC4 0x8B52
+#define GL_INT_VEC2 0x8B53
+#define GL_INT_VEC3 0x8B54
+#define GL_INT_VEC4 0x8B55
+#define GL_BOOL 0x8B56
+#define GL_BOOL_VEC2 0x8B57
+#define GL_BOOL_VEC3 0x8B58
+#define GL_BOOL_VEC4 0x8B59
+#define GL_FLOAT_MAT2 0x8B5A
+#define GL_FLOAT_MAT3 0x8B5B
+#define GL_FLOAT_MAT4 0x8B5C
+#define GL_SAMPLER_1D 0x8B5D
+#define GL_SAMPLER_2D 0x8B5E
+#define GL_SAMPLER_3D 0x8B5F
+#define GL_SAMPLER_CUBE 0x8B60
+#define GL_SAMPLER_1D_SHADOW 0x8B61
+#define GL_SAMPLER_2D_SHADOW 0x8B62
+#define GL_DELETE_STATUS 0x8B80
+#define GL_COMPILE_STATUS 0x8B81
+#define GL_LINK_STATUS 0x8B82
+#define GL_VALIDATE_STATUS 0x8B83
+#define GL_INFO_LOG_LENGTH 0x8B84
+#define GL_ATTACHED_SHADERS 0x8B85
+#define GL_ACTIVE_UNIFORMS 0x8B86
+#define GL_ACTIVE_UNIFORM_MAX_LENGTH 0x8B87
+#define GL_SHADER_SOURCE_LENGTH 0x8B88
+#define GL_ACTIVE_ATTRIBUTES 0x8B89
+#define GL_ACTIVE_ATTRIBUTE_MAX_LENGTH 0x8B8A
+#define GL_FRAGMENT_SHADER_DERIVATIVE_HINT 0x8B8B
+#define GL_SHADING_LANGUAGE_VERSION 0x8B8C
+#define GL_CURRENT_PROGRAM 0x8B8D
+#define GL_POINT_SPRITE_COORD_ORIGIN 0x8CA0
+#define GL_LOWER_LEFT 0x8CA1
+#define GL_UPPER_LEFT 0x8CA2
+#define GL_STENCIL_BACK_REF 0x8CA3
+#define GL_STENCIL_BACK_VALUE_MASK 0x8CA4
+#define GL_STENCIL_BACK_WRITEMASK 0x8CA5
+#endif
+
+#ifndef GL_VERSION_2_1
+#define GL_CURRENT_RASTER_SECONDARY_COLOR 0x845F
+#define GL_PIXEL_PACK_BUFFER 0x88EB
+#define GL_PIXEL_UNPACK_BUFFER 0x88EC
+#define GL_PIXEL_PACK_BUFFER_BINDING 0x88ED
+#define GL_PIXEL_UNPACK_BUFFER_BINDING 0x88EF
+#define GL_FLOAT_MAT2x3 0x8B65
+#define GL_FLOAT_MAT2x4 0x8B66
+#define GL_FLOAT_MAT3x2 0x8B67
+#define GL_FLOAT_MAT3x4 0x8B68
+#define GL_FLOAT_MAT4x2 0x8B69
+#define GL_FLOAT_MAT4x3 0x8B6A
+#define GL_SRGB 0x8C40
+#define GL_SRGB8 0x8C41
+#define GL_SRGB_ALPHA 0x8C42
+#define GL_SRGB8_ALPHA8 0x8C43
+#define GL_SLUMINANCE_ALPHA 0x8C44
+#define GL_SLUMINANCE8_ALPHA8 0x8C45
+#define GL_SLUMINANCE 0x8C46
+#define GL_SLUMINANCE8 0x8C47
+#define GL_COMPRESSED_SRGB 0x8C48
+#define GL_COMPRESSED_SRGB_ALPHA 0x8C49
+#define GL_COMPRESSED_SLUMINANCE 0x8C4A
+#define GL_COMPRESSED_SLUMINANCE_ALPHA 0x8C4B
+#endif
+
+#ifndef GL_VERSION_3_0
+#define GL_COMPARE_REF_TO_TEXTURE GL_COMPARE_R_TO_TEXTURE_ARB
+#define GL_CLIP_DISTANCE0 GL_CLIP_PLANE0
+#define GL_CLIP_DISTANCE1 GL_CLIP_PLANE1
+#define GL_CLIP_DISTANCE2 GL_CLIP_PLANE2
+#define GL_CLIP_DISTANCE3 GL_CLIP_PLANE3
+#define GL_CLIP_DISTANCE4 GL_CLIP_PLANE4
+#define GL_CLIP_DISTANCE5 GL_CLIP_PLANE5
+#define GL_MAX_CLIP_DISTANCES GL_MAX_CLIP_PLANES
+#define GL_MAJOR_VERSION 0x821B
+#define GL_MINOR_VERSION 0x821C
+#define GL_NUM_EXTENSIONS 0x821D
+#define GL_CONTEXT_FLAGS 0x821E
+#define GL_DEPTH_BUFFER 0x8223
+#define GL_STENCIL_BUFFER 0x8224
+#define GL_COMPRESSED_RED 0x8225
+#define GL_COMPRESSED_RG 0x8226
+#define GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT 0x0001
+#define GL_RGBA32F 0x8814
+#define GL_RGB32F 0x8815
+#define GL_RGBA16F 0x881A
+#define GL_RGB16F 0x881B
+#define GL_VERTEX_ATTRIB_ARRAY_INTEGER 0x88FD
+#define GL_MAX_ARRAY_TEXTURE_LAYERS 0x88FF
+#define GL_MIN_PROGRAM_TEXEL_OFFSET 0x8904
+#define GL_MAX_PROGRAM_TEXEL_OFFSET 0x8905
+#define GL_CLAMP_VERTEX_COLOR 0x891A
+#define GL_CLAMP_FRAGMENT_COLOR 0x891B
+#define GL_CLAMP_READ_COLOR 0x891C
+#define GL_FIXED_ONLY 0x891D
+#define GL_MAX_VARYING_COMPONENTS GL_MAX_VARYING_FLOATS
+#define GL_TEXTURE_RED_TYPE 0x8C10
+#define GL_TEXTURE_GREEN_TYPE 0x8C11
+#define GL_TEXTURE_BLUE_TYPE 0x8C12
+#define GL_TEXTURE_ALPHA_TYPE 0x8C13
+#define GL_TEXTURE_LUMINANCE_TYPE 0x8C14
+#define GL_TEXTURE_INTENSITY_TYPE 0x8C15
+#define GL_TEXTURE_DEPTH_TYPE 0x8C16
+#define GL_UNSIGNED_NORMALIZED 0x8C17
+#define GL_TEXTURE_1D_ARRAY 0x8C18
+#define GL_PROXY_TEXTURE_1D_ARRAY 0x8C19
+#define GL_TEXTURE_2D_ARRAY 0x8C1A
+#define GL_PROXY_TEXTURE_2D_ARRAY 0x8C1B
+#define GL_TEXTURE_BINDING_1D_ARRAY 0x8C1C
+#define GL_TEXTURE_BINDING_2D_ARRAY 0x8C1D
+#define GL_R11F_G11F_B10F 0x8C3A
+#define GL_UNSIGNED_INT_10F_11F_11F_REV 0x8C3B
+#define GL_RGB9_E5 0x8C3D
+#define GL_UNSIGNED_INT_5_9_9_9_REV 0x8C3E
+#define GL_TEXTURE_SHARED_SIZE 0x8C3F
+#define GL_TRANSFORM_FEEDBACK_VARYING_MAX_LENGTH 0x8C76
+#define GL_TRANSFORM_FEEDBACK_BUFFER_MODE 0x8C7F
+#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS 0x8C80
+#define GL_TRANSFORM_FEEDBACK_VARYINGS 0x8C83
+#define GL_TRANSFORM_FEEDBACK_BUFFER_START 0x8C84
+#define GL_TRANSFORM_FEEDBACK_BUFFER_SIZE 0x8C85
+#define GL_PRIMITIVES_GENERATED 0x8C87
+#define GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN 0x8C88
+#define GL_RASTERIZER_DISCARD 0x8C89
+#define GL_MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS 0x8C8A
+#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS 0x8C8B
+#define GL_INTERLEAVED_ATTRIBS 0x8C8C
+#define GL_SEPARATE_ATTRIBS 0x8C8D
+#define GL_TRANSFORM_FEEDBACK_BUFFER 0x8C8E
+#define GL_TRANSFORM_FEEDBACK_BUFFER_BINDING 0x8C8F
+#define GL_RGBA32UI 0x8D70
+#define GL_RGB32UI 0x8D71
+#define GL_RGBA16UI 0x8D76
+#define GL_RGB16UI 0x8D77
+#define GL_RGBA8UI 0x8D7C
+#define GL_RGB8UI 0x8D7D
+#define GL_RGBA32I 0x8D82
+#define GL_RGB32I 0x8D83
+#define GL_RGBA16I 0x8D88
+#define GL_RGB16I 0x8D89
+#define GL_RGBA8I 0x8D8E
+#define GL_RGB8I 0x8D8F
+#define GL_RED_INTEGER 0x8D94
+#define GL_GREEN_INTEGER 0x8D95
+#define GL_BLUE_INTEGER 0x8D96
+#define GL_ALPHA_INTEGER 0x8D97
+#define GL_RGB_INTEGER 0x8D98
+#define GL_RGBA_INTEGER 0x8D99
+#define GL_BGR_INTEGER 0x8D9A
+#define GL_BGRA_INTEGER 0x8D9B
+#define GL_SAMPLER_1D_ARRAY 0x8DC0
+#define GL_SAMPLER_2D_ARRAY 0x8DC1
+#define GL_SAMPLER_1D_ARRAY_SHADOW 0x8DC3
+#define GL_SAMPLER_2D_ARRAY_SHADOW 0x8DC4
+#define GL_SAMPLER_CUBE_SHADOW 0x8DC5
+#define GL_UNSIGNED_INT_VEC2 0x8DC6
+#define GL_UNSIGNED_INT_VEC3 0x8DC7
+#define GL_UNSIGNED_INT_VEC4 0x8DC8
+#define GL_INT_SAMPLER_1D 0x8DC9
+#define GL_INT_SAMPLER_2D 0x8DCA
+#define GL_INT_SAMPLER_3D 0x8DCB
+#define GL_INT_SAMPLER_CUBE 0x8DCC
+#define GL_INT_SAMPLER_1D_ARRAY 0x8DCE
+#define GL_INT_SAMPLER_2D_ARRAY 0x8DCF
+#define GL_UNSIGNED_INT_SAMPLER_1D 0x8DD1
+#define GL_UNSIGNED_INT_SAMPLER_2D 0x8DD2
+#define GL_UNSIGNED_INT_SAMPLER_3D 0x8DD3
+#define GL_UNSIGNED_INT_SAMPLER_CUBE 0x8DD4
+#define GL_UNSIGNED_INT_SAMPLER_1D_ARRAY 0x8DD6
+#define GL_UNSIGNED_INT_SAMPLER_2D_ARRAY 0x8DD7
+#define GL_QUERY_WAIT 0x8E13
+#define GL_QUERY_NO_WAIT 0x8E14
+#define GL_QUERY_BY_REGION_WAIT 0x8E15
+#define GL_QUERY_BY_REGION_NO_WAIT 0x8E16
+/* Reuse tokens from ARB_depth_buffer_float */
+/* reuse GL_DEPTH_COMPONENT32F */
+/* reuse GL_DEPTH32F_STENCIL8 */
+/* reuse GL_FLOAT_32_UNSIGNED_INT_24_8_REV */
+/* Reuse tokens from ARB_framebuffer_object */
+/* reuse GL_INVALID_FRAMEBUFFER_OPERATION */
+/* reuse GL_FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING */
+/* reuse GL_FRAMEBUFFER_ATTACHMENT_COMPONENT_TYPE */
+/* reuse GL_FRAMEBUFFER_ATTACHMENT_RED_SIZE */
+/* reuse GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE */
+/* reuse GL_FRAMEBUFFER_ATTACHMENT_BLUE_SIZE */
+/* reuse GL_FRAMEBUFFER_ATTACHMENT_ALPHA_SIZE */
+/* reuse GL_FRAMEBUFFER_ATTACHMENT_DEPTH_SIZE */
+/* reuse GL_FRAMEBUFFER_ATTACHMENT_STENCIL_SIZE */
+/* reuse GL_FRAMEBUFFER_DEFAULT */
+/* reuse GL_FRAMEBUFFER_UNDEFINED */
+/* reuse GL_DEPTH_STENCIL_ATTACHMENT */
+/* reuse GL_INDEX */
+/* reuse GL_MAX_RENDERBUFFER_SIZE */
+/* reuse GL_DEPTH_STENCIL */
+/* reuse GL_UNSIGNED_INT_24_8 */
+/* reuse GL_DEPTH24_STENCIL8 */
+/* reuse GL_TEXTURE_STENCIL_SIZE */
+/* reuse GL_TEXTURE_RED_TYPE */
+/* reuse GL_TEXTURE_GREEN_TYPE */
+/* reuse GL_TEXTURE_BLUE_TYPE */
+/* reuse GL_TEXTURE_ALPHA_TYPE */
+/* reuse GL_TEXTURE_LUMINANCE_TYPE */
+/* reuse GL_TEXTURE_INTENSITY_TYPE */
+/* reuse GL_TEXTURE_DEPTH_TYPE */
+/* reuse GL_UNSIGNED_NORMALIZED */
+/* reuse GL_FRAMEBUFFER_BINDING */
+/* reuse GL_DRAW_FRAMEBUFFER_BINDING */
+/* reuse GL_RENDERBUFFER_BINDING */
+/* reuse GL_READ_FRAMEBUFFER */
+/* reuse GL_DRAW_FRAMEBUFFER */
+/* reuse GL_READ_FRAMEBUFFER_BINDING */
+/* reuse GL_RENDERBUFFER_SAMPLES */
+/* reuse GL_FRAMEBUFFER_ATTACHMENT_TYPE */
+/* reuse GL_FRAMEBUFFER_ATTACHMENT_NAME */
+/* reuse GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL */
+/* reuse GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE */
+/* reuse GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LAYER */
+/* reuse GL_FRAMEBUFFER_COMPLETE */
+/* reuse GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT */
+/* reuse GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT */
+/* reuse GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER */
+/* reuse GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER */
+/* reuse GL_FRAMEBUFFER_UNSUPPORTED */
+/* reuse GL_MAX_COLOR_ATTACHMENTS */
+/* reuse GL_COLOR_ATTACHMENT0 */
+/* reuse GL_COLOR_ATTACHMENT1 */
+/* reuse GL_COLOR_ATTACHMENT2 */
+/* reuse GL_COLOR_ATTACHMENT3 */
+/* reuse GL_COLOR_ATTACHMENT4 */
+/* reuse GL_COLOR_ATTACHMENT5 */
+/* reuse GL_COLOR_ATTACHMENT6 */
+/* reuse GL_COLOR_ATTACHMENT7 */
+/* reuse GL_COLOR_ATTACHMENT8 */
+/* reuse GL_COLOR_ATTACHMENT9 */
+/* reuse GL_COLOR_ATTACHMENT10 */
+/* reuse GL_COLOR_ATTACHMENT11 */
+/* reuse GL_COLOR_ATTACHMENT12 */
+/* reuse GL_COLOR_ATTACHMENT13 */
+/* reuse GL_COLOR_ATTACHMENT14 */
+/* reuse GL_COLOR_ATTACHMENT15 */
+/* reuse GL_DEPTH_ATTACHMENT */
+/* reuse GL_STENCIL_ATTACHMENT */
+/* reuse GL_FRAMEBUFFER */
+/* reuse GL_RENDERBUFFER */
+/* reuse GL_RENDERBUFFER_WIDTH */
+/* reuse GL_RENDERBUFFER_HEIGHT */
+/* reuse GL_RENDERBUFFER_INTERNAL_FORMAT */
+/* reuse GL_STENCIL_INDEX1 */
+/* reuse GL_STENCIL_INDEX4 */
+/* reuse GL_STENCIL_INDEX8 */
+/* reuse GL_STENCIL_INDEX16 */
+/* reuse GL_RENDERBUFFER_RED_SIZE */
+/* reuse GL_RENDERBUFFER_GREEN_SIZE */
+/* reuse GL_RENDERBUFFER_BLUE_SIZE */
+/* reuse GL_RENDERBUFFER_ALPHA_SIZE */
+/* reuse GL_RENDERBUFFER_DEPTH_SIZE */
+/* reuse GL_RENDERBUFFER_STENCIL_SIZE */
+/* reuse GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE */
+/* reuse GL_MAX_SAMPLES */
+/* Reuse tokens from ARB_framebuffer_sRGB */
+/* reuse GL_FRAMEBUFFER_SRGB */
+/* Reuse tokens from ARB_half_float_vertex */
+/* reuse GL_HALF_FLOAT */
+/* Reuse tokens from ARB_map_buffer_range */
+/* reuse GL_MAP_READ_BIT */
+/* reuse GL_MAP_WRITE_BIT */
+/* reuse GL_MAP_INVALIDATE_RANGE_BIT */
+/* reuse GL_MAP_INVALIDATE_BUFFER_BIT */
+/* reuse GL_MAP_FLUSH_EXPLICIT_BIT */
+/* reuse GL_MAP_UNSYNCHRONIZED_BIT */
+/* Reuse tokens from ARB_texture_compression_rgtc */
+/* reuse GL_COMPRESSED_RED_RGTC1 */
+/* reuse GL_COMPRESSED_SIGNED_RED_RGTC1 */
+/* reuse GL_COMPRESSED_RG_RGTC2 */
+/* reuse GL_COMPRESSED_SIGNED_RG_RGTC2 */
+/* Reuse tokens from ARB_texture_rg */
+/* reuse GL_RG */
+/* reuse GL_RG_INTEGER */
+/* reuse GL_R8 */
+/* reuse GL_R16 */
+/* reuse GL_RG8 */
+/* reuse GL_RG16 */
+/* reuse GL_R16F */
+/* reuse GL_R32F */
+/* reuse GL_RG16F */
+/* reuse GL_RG32F */
+/* reuse GL_R8I */
+/* reuse GL_R8UI */
+/* reuse GL_R16I */
+/* reuse GL_R16UI */
+/* reuse GL_R32I */
+/* reuse GL_R32UI */
+/* reuse GL_RG8I */
+/* reuse GL_RG8UI */
+/* reuse GL_RG16I */
+/* reuse GL_RG16UI */
+/* reuse GL_RG32I */
+/* reuse GL_RG32UI */
+/* Reuse tokens from ARB_vertex_array_object */
+/* reuse GL_VERTEX_ARRAY_BINDING */
+#endif
+
+#ifndef GL_ARB_multitexture
+#define GL_TEXTURE0_ARB 0x84C0
+#define GL_TEXTURE1_ARB 0x84C1
+#define GL_TEXTURE2_ARB 0x84C2
+#define GL_TEXTURE3_ARB 0x84C3
+#define GL_TEXTURE4_ARB 0x84C4
+#define GL_TEXTURE5_ARB 0x84C5
+#define GL_TEXTURE6_ARB 0x84C6
+#define GL_TEXTURE7_ARB 0x84C7
+#define GL_TEXTURE8_ARB 0x84C8
+#define GL_TEXTURE9_ARB 0x84C9
+#define GL_TEXTURE10_ARB 0x84CA
+#define GL_TEXTURE11_ARB 0x84CB
+#define GL_TEXTURE12_ARB 0x84CC
+#define GL_TEXTURE13_ARB 0x84CD
+#define GL_TEXTURE14_ARB 0x84CE
+#define GL_TEXTURE15_ARB 0x84CF
+#define GL_TEXTURE16_ARB 0x84D0
+#define GL_TEXTURE17_ARB 0x84D1
+#define GL_TEXTURE18_ARB 0x84D2
+#define GL_TEXTURE19_ARB 0x84D3
+#define GL_TEXTURE20_ARB 0x84D4
+#define GL_TEXTURE21_ARB 0x84D5
+#define GL_TEXTURE22_ARB 0x84D6
+#define GL_TEXTURE23_ARB 0x84D7
+#define GL_TEXTURE24_ARB 0x84D8
+#define GL_TEXTURE25_ARB 0x84D9
+#define GL_TEXTURE26_ARB 0x84DA
+#define GL_TEXTURE27_ARB 0x84DB
+#define GL_TEXTURE28_ARB 0x84DC
+#define GL_TEXTURE29_ARB 0x84DD
+#define GL_TEXTURE30_ARB 0x84DE
+#define GL_TEXTURE31_ARB 0x84DF
+#define GL_ACTIVE_TEXTURE_ARB 0x84E0
+#define GL_CLIENT_ACTIVE_TEXTURE_ARB 0x84E1
+#define GL_MAX_TEXTURE_UNITS_ARB 0x84E2
+#endif
+
+#ifndef GL_ARB_transpose_matrix
+#define GL_TRANSPOSE_MODELVIEW_MATRIX_ARB 0x84E3
+#define GL_TRANSPOSE_PROJECTION_MATRIX_ARB 0x84E4
+#define GL_TRANSPOSE_TEXTURE_MATRIX_ARB 0x84E5
+#define GL_TRANSPOSE_COLOR_MATRIX_ARB 0x84E6
+#endif
+
+#ifndef GL_ARB_multisample
+#define GL_MULTISAMPLE_ARB 0x809D
+#define GL_SAMPLE_ALPHA_TO_COVERAGE_ARB 0x809E
+#define GL_SAMPLE_ALPHA_TO_ONE_ARB 0x809F
+#define GL_SAMPLE_COVERAGE_ARB 0x80A0
+#define GL_SAMPLE_BUFFERS_ARB 0x80A8
+#define GL_SAMPLES_ARB 0x80A9
+#define GL_SAMPLE_COVERAGE_VALUE_ARB 0x80AA
+#define GL_SAMPLE_COVERAGE_INVERT_ARB 0x80AB
+#define GL_MULTISAMPLE_BIT_ARB 0x20000000
+#endif
+
+#ifndef GL_ARB_texture_env_add
+#endif
+
+#ifndef GL_ARB_texture_cube_map
+#define GL_NORMAL_MAP_ARB 0x8511
+#define GL_REFLECTION_MAP_ARB 0x8512
+#define GL_TEXTURE_CUBE_MAP_ARB 0x8513
+#define GL_TEXTURE_BINDING_CUBE_MAP_ARB 0x8514
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB 0x8515
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB 0x8516
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB 0x8517
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB 0x8518
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB 0x8519
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB 0x851A
+#define GL_PROXY_TEXTURE_CUBE_MAP_ARB 0x851B
+#define GL_MAX_CUBE_MAP_TEXTURE_SIZE_ARB 0x851C
+#endif
+
+#ifndef GL_ARB_texture_compression
+#define GL_COMPRESSED_ALPHA_ARB 0x84E9
+#define GL_COMPRESSED_LUMINANCE_ARB 0x84EA
+#define GL_COMPRESSED_LUMINANCE_ALPHA_ARB 0x84EB
+#define GL_COMPRESSED_INTENSITY_ARB 0x84EC
+#define GL_COMPRESSED_RGB_ARB 0x84ED
+#define GL_COMPRESSED_RGBA_ARB 0x84EE
+#define GL_TEXTURE_COMPRESSION_HINT_ARB 0x84EF
+#define GL_TEXTURE_COMPRESSED_IMAGE_SIZE_ARB 0x86A0
+#define GL_TEXTURE_COMPRESSED_ARB 0x86A1
+#define GL_NUM_COMPRESSED_TEXTURE_FORMATS_ARB 0x86A2
+#define GL_COMPRESSED_TEXTURE_FORMATS_ARB 0x86A3
+#endif
+
+#ifndef GL_ARB_texture_border_clamp
+#define GL_CLAMP_TO_BORDER_ARB 0x812D
+#endif
+
+#ifndef GL_ARB_point_parameters
+#define GL_POINT_SIZE_MIN_ARB 0x8126
+#define GL_POINT_SIZE_MAX_ARB 0x8127
+#define GL_POINT_FADE_THRESHOLD_SIZE_ARB 0x8128
+#define GL_POINT_DISTANCE_ATTENUATION_ARB 0x8129
+#endif
+
+#ifndef GL_ARB_vertex_blend
+#define GL_MAX_VERTEX_UNITS_ARB 0x86A4
+#define GL_ACTIVE_VERTEX_UNITS_ARB 0x86A5
+#define GL_WEIGHT_SUM_UNITY_ARB 0x86A6
+#define GL_VERTEX_BLEND_ARB 0x86A7
+#define GL_CURRENT_WEIGHT_ARB 0x86A8
+#define GL_WEIGHT_ARRAY_TYPE_ARB 0x86A9
+#define GL_WEIGHT_ARRAY_STRIDE_ARB 0x86AA
+#define GL_WEIGHT_ARRAY_SIZE_ARB 0x86AB
+#define GL_WEIGHT_ARRAY_POINTER_ARB 0x86AC
+#define GL_WEIGHT_ARRAY_ARB 0x86AD
+#define GL_MODELVIEW0_ARB 0x1700
+#define GL_MODELVIEW1_ARB 0x850A
+#define GL_MODELVIEW2_ARB 0x8722
+#define GL_MODELVIEW3_ARB 0x8723
+#define GL_MODELVIEW4_ARB 0x8724
+#define GL_MODELVIEW5_ARB 0x8725
+#define GL_MODELVIEW6_ARB 0x8726
+#define GL_MODELVIEW7_ARB 0x8727
+#define GL_MODELVIEW8_ARB 0x8728
+#define GL_MODELVIEW9_ARB 0x8729
+#define GL_MODELVIEW10_ARB 0x872A
+#define GL_MODELVIEW11_ARB 0x872B
+#define GL_MODELVIEW12_ARB 0x872C
+#define GL_MODELVIEW13_ARB 0x872D
+#define GL_MODELVIEW14_ARB 0x872E
+#define GL_MODELVIEW15_ARB 0x872F
+#define GL_MODELVIEW16_ARB 0x8730
+#define GL_MODELVIEW17_ARB 0x8731
+#define GL_MODELVIEW18_ARB 0x8732
+#define GL_MODELVIEW19_ARB 0x8733
+#define GL_MODELVIEW20_ARB 0x8734
+#define GL_MODELVIEW21_ARB 0x8735
+#define GL_MODELVIEW22_ARB 0x8736
+#define GL_MODELVIEW23_ARB 0x8737
+#define GL_MODELVIEW24_ARB 0x8738
+#define GL_MODELVIEW25_ARB 0x8739
+#define GL_MODELVIEW26_ARB 0x873A
+#define GL_MODELVIEW27_ARB 0x873B
+#define GL_MODELVIEW28_ARB 0x873C
+#define GL_MODELVIEW29_ARB 0x873D
+#define GL_MODELVIEW30_ARB 0x873E
+#define GL_MODELVIEW31_ARB 0x873F
+#endif
+
+#ifndef GL_ARB_matrix_palette
+#define GL_MATRIX_PALETTE_ARB 0x8840
+#define GL_MAX_MATRIX_PALETTE_STACK_DEPTH_ARB 0x8841
+#define GL_MAX_PALETTE_MATRICES_ARB 0x8842
+#define GL_CURRENT_PALETTE_MATRIX_ARB 0x8843
+#define GL_MATRIX_INDEX_ARRAY_ARB 0x8844
+#define GL_CURRENT_MATRIX_INDEX_ARB 0x8845
+#define GL_MATRIX_INDEX_ARRAY_SIZE_ARB 0x8846
+#define GL_MATRIX_INDEX_ARRAY_TYPE_ARB 0x8847
+#define GL_MATRIX_INDEX_ARRAY_STRIDE_ARB 0x8848
+#define GL_MATRIX_INDEX_ARRAY_POINTER_ARB 0x8849
+#endif
+
+#ifndef GL_ARB_texture_env_combine
+#define GL_COMBINE_ARB 0x8570
+#define GL_COMBINE_RGB_ARB 0x8571
+#define GL_COMBINE_ALPHA_ARB 0x8572
+#define GL_SOURCE0_RGB_ARB 0x8580
+#define GL_SOURCE1_RGB_ARB 0x8581
+#define GL_SOURCE2_RGB_ARB 0x8582
+#define GL_SOURCE0_ALPHA_ARB 0x8588
+#define GL_SOURCE1_ALPHA_ARB 0x8589
+#define GL_SOURCE2_ALPHA_ARB 0x858A
+#define GL_OPERAND0_RGB_ARB 0x8590
+#define GL_OPERAND1_RGB_ARB 0x8591
+#define GL_OPERAND2_RGB_ARB 0x8592
+#define GL_OPERAND0_ALPHA_ARB 0x8598
+#define GL_OPERAND1_ALPHA_ARB 0x8599
+#define GL_OPERAND2_ALPHA_ARB 0x859A
+#define GL_RGB_SCALE_ARB 0x8573
+#define GL_ADD_SIGNED_ARB 0x8574
+#define GL_INTERPOLATE_ARB 0x8575
+#define GL_SUBTRACT_ARB 0x84E7
+#define GL_CONSTANT_ARB 0x8576
+#define GL_PRIMARY_COLOR_ARB 0x8577
+#define GL_PREVIOUS_ARB 0x8578
+#endif
+
+#ifndef GL_ARB_texture_env_crossbar
+#endif
+
+#ifndef GL_ARB_texture_env_dot3
+#define GL_DOT3_RGB_ARB 0x86AE
+#define GL_DOT3_RGBA_ARB 0x86AF
+#endif
+
+#ifndef GL_ARB_texture_mirrored_repeat
+#define GL_MIRRORED_REPEAT_ARB 0x8370
+#endif
+
+#ifndef GL_ARB_depth_texture
+#define GL_DEPTH_COMPONENT16_ARB 0x81A5
+#define GL_DEPTH_COMPONENT24_ARB 0x81A6
+#define GL_DEPTH_COMPONENT32_ARB 0x81A7
+#define GL_TEXTURE_DEPTH_SIZE_ARB 0x884A
+#define GL_DEPTH_TEXTURE_MODE_ARB 0x884B
+#endif
+
+#ifndef GL_ARB_shadow
+#define GL_TEXTURE_COMPARE_MODE_ARB 0x884C
+#define GL_TEXTURE_COMPARE_FUNC_ARB 0x884D
+#define GL_COMPARE_R_TO_TEXTURE_ARB 0x884E
+#endif
+
+#ifndef GL_ARB_shadow_ambient
+#define GL_TEXTURE_COMPARE_FAIL_VALUE_ARB 0x80BF
+#endif
+
+#ifndef GL_ARB_window_pos
+#endif
+
+#ifndef GL_ARB_vertex_program
+#define GL_COLOR_SUM_ARB 0x8458
+#define GL_VERTEX_PROGRAM_ARB 0x8620
+#define GL_VERTEX_ATTRIB_ARRAY_ENABLED_ARB 0x8622
+#define GL_VERTEX_ATTRIB_ARRAY_SIZE_ARB 0x8623
+#define GL_VERTEX_ATTRIB_ARRAY_STRIDE_ARB 0x8624
+#define GL_VERTEX_ATTRIB_ARRAY_TYPE_ARB 0x8625
+#define GL_CURRENT_VERTEX_ATTRIB_ARB 0x8626
+#define GL_PROGRAM_LENGTH_ARB 0x8627
+#define GL_PROGRAM_STRING_ARB 0x8628
+#define GL_MAX_PROGRAM_MATRIX_STACK_DEPTH_ARB 0x862E
+#define GL_MAX_PROGRAM_MATRICES_ARB 0x862F
+#define GL_CURRENT_MATRIX_STACK_DEPTH_ARB 0x8640
+#define GL_CURRENT_MATRIX_ARB 0x8641
+#define GL_VERTEX_PROGRAM_POINT_SIZE_ARB 0x8642
+#define GL_VERTEX_PROGRAM_TWO_SIDE_ARB 0x8643
+#define GL_VERTEX_ATTRIB_ARRAY_POINTER_ARB 0x8645
+#define GL_PROGRAM_ERROR_POSITION_ARB 0x864B
+#define GL_PROGRAM_BINDING_ARB 0x8677
+#define GL_MAX_VERTEX_ATTRIBS_ARB 0x8869
+#define GL_VERTEX_ATTRIB_ARRAY_NORMALIZED_ARB 0x886A
+#define GL_PROGRAM_ERROR_STRING_ARB 0x8874
+#define GL_PROGRAM_FORMAT_ASCII_ARB 0x8875
+#define GL_PROGRAM_FORMAT_ARB 0x8876
+#define GL_PROGRAM_INSTRUCTIONS_ARB 0x88A0
+#define GL_MAX_PROGRAM_INSTRUCTIONS_ARB 0x88A1
+#define GL_PROGRAM_NATIVE_INSTRUCTIONS_ARB 0x88A2
+#define GL_MAX_PROGRAM_NATIVE_INSTRUCTIONS_ARB 0x88A3
+#define GL_PROGRAM_TEMPORARIES_ARB 0x88A4
+#define GL_MAX_PROGRAM_TEMPORARIES_ARB 0x88A5
+#define GL_PROGRAM_NATIVE_TEMPORARIES_ARB 0x88A6
+#define GL_MAX_PROGRAM_NATIVE_TEMPORARIES_ARB 0x88A7
+#define GL_PROGRAM_PARAMETERS_ARB 0x88A8
+#define GL_MAX_PROGRAM_PARAMETERS_ARB 0x88A9
+#define GL_PROGRAM_NATIVE_PARAMETERS_ARB 0x88AA
+#define GL_MAX_PROGRAM_NATIVE_PARAMETERS_ARB 0x88AB
+#define GL_PROGRAM_ATTRIBS_ARB 0x88AC
+#define GL_MAX_PROGRAM_ATTRIBS_ARB 0x88AD
+#define GL_PROGRAM_NATIVE_ATTRIBS_ARB 0x88AE
+#define GL_MAX_PROGRAM_NATIVE_ATTRIBS_ARB 0x88AF
+#define GL_PROGRAM_ADDRESS_REGISTERS_ARB 0x88B0
+#define GL_MAX_PROGRAM_ADDRESS_REGISTERS_ARB 0x88B1
+#define GL_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB 0x88B2
+#define GL_MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB 0x88B3
+#define GL_MAX_PROGRAM_LOCAL_PARAMETERS_ARB 0x88B4
+#define GL_MAX_PROGRAM_ENV_PARAMETERS_ARB 0x88B5
+#define GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB 0x88B6
+#define GL_TRANSPOSE_CURRENT_MATRIX_ARB 0x88B7
+#define GL_MATRIX0_ARB 0x88C0
+#define GL_MATRIX1_ARB 0x88C1
+#define GL_MATRIX2_ARB 0x88C2
+#define GL_MATRIX3_ARB 0x88C3
+#define GL_MATRIX4_ARB 0x88C4
+#define GL_MATRIX5_ARB 0x88C5
+#define GL_MATRIX6_ARB 0x88C6
+#define GL_MATRIX7_ARB 0x88C7
+#define GL_MATRIX8_ARB 0x88C8
+#define GL_MATRIX9_ARB 0x88C9
+#define GL_MATRIX10_ARB 0x88CA
+#define GL_MATRIX11_ARB 0x88CB
+#define GL_MATRIX12_ARB 0x88CC
+#define GL_MATRIX13_ARB 0x88CD
+#define GL_MATRIX14_ARB 0x88CE
+#define GL_MATRIX15_ARB 0x88CF
+#define GL_MATRIX16_ARB 0x88D0
+#define GL_MATRIX17_ARB 0x88D1
+#define GL_MATRIX18_ARB 0x88D2
+#define GL_MATRIX19_ARB 0x88D3
+#define GL_MATRIX20_ARB 0x88D4
+#define GL_MATRIX21_ARB 0x88D5
+#define GL_MATRIX22_ARB 0x88D6
+#define GL_MATRIX23_ARB 0x88D7
+#define GL_MATRIX24_ARB 0x88D8
+#define GL_MATRIX25_ARB 0x88D9
+#define GL_MATRIX26_ARB 0x88DA
+#define GL_MATRIX27_ARB 0x88DB
+#define GL_MATRIX28_ARB 0x88DC
+#define GL_MATRIX29_ARB 0x88DD
+#define GL_MATRIX30_ARB 0x88DE
+#define GL_MATRIX31_ARB 0x88DF
+#endif
+
+#ifndef GL_ARB_fragment_program
+#define GL_FRAGMENT_PROGRAM_ARB 0x8804
+#define GL_PROGRAM_ALU_INSTRUCTIONS_ARB 0x8805
+#define GL_PROGRAM_TEX_INSTRUCTIONS_ARB 0x8806
+#define GL_PROGRAM_TEX_INDIRECTIONS_ARB 0x8807
+#define GL_PROGRAM_NATIVE_ALU_INSTRUCTIONS_ARB 0x8808
+#define GL_PROGRAM_NATIVE_TEX_INSTRUCTIONS_ARB 0x8809
+#define GL_PROGRAM_NATIVE_TEX_INDIRECTIONS_ARB 0x880A
+#define GL_MAX_PROGRAM_ALU_INSTRUCTIONS_ARB 0x880B
+#define GL_MAX_PROGRAM_TEX_INSTRUCTIONS_ARB 0x880C
+#define GL_MAX_PROGRAM_TEX_INDIRECTIONS_ARB 0x880D
+#define GL_MAX_PROGRAM_NATIVE_ALU_INSTRUCTIONS_ARB 0x880E
+#define GL_MAX_PROGRAM_NATIVE_TEX_INSTRUCTIONS_ARB 0x880F
+#define GL_MAX_PROGRAM_NATIVE_TEX_INDIRECTIONS_ARB 0x8810
+#define GL_MAX_TEXTURE_COORDS_ARB 0x8871
+#define GL_MAX_TEXTURE_IMAGE_UNITS_ARB 0x8872
+#endif
+
+#ifndef GL_ARB_vertex_buffer_object
+#define GL_BUFFER_SIZE_ARB 0x8764
+#define GL_BUFFER_USAGE_ARB 0x8765
+#define GL_ARRAY_BUFFER_ARB 0x8892
+#define GL_ELEMENT_ARRAY_BUFFER_ARB 0x8893
+#define GL_ARRAY_BUFFER_BINDING_ARB 0x8894
+#define GL_ELEMENT_ARRAY_BUFFER_BINDING_ARB 0x8895
+#define GL_VERTEX_ARRAY_BUFFER_BINDING_ARB 0x8896
+#define GL_NORMAL_ARRAY_BUFFER_BINDING_ARB 0x8897
+#define GL_COLOR_ARRAY_BUFFER_BINDING_ARB 0x8898
+#define GL_INDEX_ARRAY_BUFFER_BINDING_ARB 0x8899
+#define GL_TEXTURE_COORD_ARRAY_BUFFER_BINDING_ARB 0x889A
+#define GL_EDGE_FLAG_ARRAY_BUFFER_BINDING_ARB 0x889B
+#define GL_SECONDARY_COLOR_ARRAY_BUFFER_BINDING_ARB 0x889C
+#define GL_FOG_COORDINATE_ARRAY_BUFFER_BINDING_ARB 0x889D
+#define GL_WEIGHT_ARRAY_BUFFER_BINDING_ARB 0x889E
+#define GL_VERTEX_ATTRIB_ARRAY_BUFFER_BINDING_ARB 0x889F
+#define GL_READ_ONLY_ARB 0x88B8
+#define GL_WRITE_ONLY_ARB 0x88B9
+#define GL_READ_WRITE_ARB 0x88BA
+#define GL_BUFFER_ACCESS_ARB 0x88BB
+#define GL_BUFFER_MAPPED_ARB 0x88BC
+#define GL_BUFFER_MAP_POINTER_ARB 0x88BD
+#define GL_STREAM_DRAW_ARB 0x88E0
+#define GL_STREAM_READ_ARB 0x88E1
+#define GL_STREAM_COPY_ARB 0x88E2
+#define GL_STATIC_DRAW_ARB 0x88E4
+#define GL_STATIC_READ_ARB 0x88E5
+#define GL_STATIC_COPY_ARB 0x88E6
+#define GL_DYNAMIC_DRAW_ARB 0x88E8
+#define GL_DYNAMIC_READ_ARB 0x88E9
+#define GL_DYNAMIC_COPY_ARB 0x88EA
+#endif
+
+#ifndef GL_ARB_occlusion_query
+#define GL_QUERY_COUNTER_BITS_ARB 0x8864
+#define GL_CURRENT_QUERY_ARB 0x8865
+#define GL_QUERY_RESULT_ARB 0x8866
+#define GL_QUERY_RESULT_AVAILABLE_ARB 0x8867
+#define GL_SAMPLES_PASSED_ARB 0x8914
+#endif
+
+#ifndef GL_ARB_shader_objects
+#define GL_PROGRAM_ARB 0x8B40
+#define GL_SHADER_ARB 0x8B48
+#define GL_TYPE_ARB 0x8B4E
+#define GL_SUBTYPE_ARB 0x8B4F
+#define GL_FLOAT_VEC2_ARB 0x8B50
+#define GL_FLOAT_VEC3_ARB 0x8B51
+#define GL_FLOAT_VEC4_ARB 0x8B52
+#define GL_INT_VEC2_ARB 0x8B53
+#define GL_INT_VEC3_ARB 0x8B54
+#define GL_INT_VEC4_ARB 0x8B55
+#define GL_BOOL_ARB 0x8B56
+#define GL_BOOL_VEC2_ARB 0x8B57
+#define GL_BOOL_VEC3_ARB 0x8B58
+#define GL_BOOL_VEC4_ARB 0x8B59
+#define GL_FLOAT_MAT2_ARB 0x8B5A
+#define GL_FLOAT_MAT3_ARB 0x8B5B
+#define GL_FLOAT_MAT4_ARB 0x8B5C
+#define GL_SAMPLER_1D_ARB 0x8B5D
+#define GL_SAMPLER_2D_ARB 0x8B5E
+#define GL_SAMPLER_3D_ARB 0x8B5F
+#define GL_SAMPLER_CUBE_ARB 0x8B60
+#define GL_SAMPLER_1D_SHADOW_ARB 0x8B61
+#define GL_SAMPLER_2D_SHADOW_ARB 0x8B62
+#define GL_SAMPLER_2D_RECT_ARB 0x8B63
+#define GL_SAMPLER_2D_RECT_SHADOW_ARB 0x8B64
+#define GL_DELETE_STATUS_ARB 0x8B80
+#define GL_COMPILE_STATUS_ARB 0x8B81
+#define GL_LINK_STATUS_ARB 0x8B82
+#define GL_VALIDATE_STATUS_ARB 0x8B83
+#define GL_INFO_LOG_LENGTH_ARB 0x8B84
+#define GL_ATTACHEDS_ARB 0x8B85
+#define GL_ACTIVE_UNIFORMS_ARB 0x8B86
+#define GL_ACTIVE_UNIFORM_MAX_LENGTH_ARB 0x8B87
+#define GL_SHADER_SOURCE_LENGTH_ARB 0x8B88
+#endif
+
+#ifndef GL_ARB_vertex_shader
+#define GL_VERTEX_SHADER_ARB 0x8B31
+#define GL_MAX_VERTEX_UNIFORM_COMPONENTS_ARB 0x8B4A
+#define GL_MAX_VARYING_FLOATS_ARB 0x8B4B
+#define GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS_ARB 0x8B4C
+#define GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS_ARB 0x8B4D
+#define GL_ACTIVE_ATTRIBUTES_ARB 0x8B89
+#define GL_ACTIVE_ATTRIBUTE_MAX_LENGTH_ARB 0x8B8A
+#endif
+
+#ifndef GL_ARB_fragment_shader
+#define GL_FRAGMENT_SHADER_ARB 0x8B30
+#define GL_MAX_FRAGMENT_UNIFORM_COMPONENTS_ARB 0x8B49
+#define GL_FRAGMENT_SHADER_DERIVATIVE_HINT_ARB 0x8B8B
+#endif
+
+#ifndef GL_ARB_shading_language_100
+#define GL_SHADING_LANGUAGE_VERSION_ARB 0x8B8C
+#endif
+
+#ifndef GL_ARB_texture_non_power_of_two
+#endif
+
+#ifndef GL_ARB_point_sprite
+#define GL_POINT_SPRITE_ARB 0x8861
+#define GL_COORD_REPLACE_ARB 0x8862
+#endif
+
+#ifndef GL_ARB_fragment_program_shadow
+#endif
+
+#ifndef GL_ARB_draw_buffers
+#define GL_MAX_DRAW_BUFFERS_ARB 0x8824
+#define GL_DRAW_BUFFER0_ARB 0x8825
+#define GL_DRAW_BUFFER1_ARB 0x8826
+#define GL_DRAW_BUFFER2_ARB 0x8827
+#define GL_DRAW_BUFFER3_ARB 0x8828
+#define GL_DRAW_BUFFER4_ARB 0x8829
+#define GL_DRAW_BUFFER5_ARB 0x882A
+#define GL_DRAW_BUFFER6_ARB 0x882B
+#define GL_DRAW_BUFFER7_ARB 0x882C
+#define GL_DRAW_BUFFER8_ARB 0x882D
+#define GL_DRAW_BUFFER9_ARB 0x882E
+#define GL_DRAW_BUFFER10_ARB 0x882F
+#define GL_DRAW_BUFFER11_ARB 0x8830
+#define GL_DRAW_BUFFER12_ARB 0x8831
+#define GL_DRAW_BUFFER13_ARB 0x8832
+#define GL_DRAW_BUFFER14_ARB 0x8833
+#define GL_DRAW_BUFFER15_ARB 0x8834
+#endif
+
+#ifndef GL_ARB_texture_rectangle
+#define GL_TEXTURE_RECTANGLE_ARB 0x84F5
+#define GL_TEXTURE_BINDING_RECTANGLE_ARB 0x84F6
+#define GL_PROXY_TEXTURE_RECTANGLE_ARB 0x84F7
+#define GL_MAX_RECTANGLE_TEXTURE_SIZE_ARB 0x84F8
+#endif
+
+#ifndef GL_ARB_color_buffer_float
+#define GL_RGBA_FLOAT_MODE_ARB 0x8820
+#define GL_CLAMP_VERTEX_COLOR_ARB 0x891A
+#define GL_CLAMP_FRAGMENT_COLOR_ARB 0x891B
+#define GL_CLAMP_READ_COLOR_ARB 0x891C
+#define GL_FIXED_ONLY_ARB 0x891D
+#endif
+
+#ifndef GL_ARB_half_float_pixel
+#define GL_HALF_FLOAT_ARB 0x140B
+#endif
+
+#ifndef GL_ARB_texture_float
+#define GL_TEXTURE_RED_TYPE_ARB 0x8C10
+#define GL_TEXTURE_GREEN_TYPE_ARB 0x8C11
+#define GL_TEXTURE_BLUE_TYPE_ARB 0x8C12
+#define GL_TEXTURE_ALPHA_TYPE_ARB 0x8C13
+#define GL_TEXTURE_LUMINANCE_TYPE_ARB 0x8C14
+#define GL_TEXTURE_INTENSITY_TYPE_ARB 0x8C15
+#define GL_TEXTURE_DEPTH_TYPE_ARB 0x8C16
+#define GL_UNSIGNED_NORMALIZED_ARB 0x8C17
+#define GL_RGBA32F_ARB 0x8814
+#define GL_RGB32F_ARB 0x8815
+#define GL_ALPHA32F_ARB 0x8816
+#define GL_INTENSITY32F_ARB 0x8817
+#define GL_LUMINANCE32F_ARB 0x8818
+#define GL_LUMINANCE_ALPHA32F_ARB 0x8819
+#define GL_RGBA16F_ARB 0x881A
+#define GL_RGB16F_ARB 0x881B
+#define GL_ALPHA16F_ARB 0x881C
+#define GL_INTENSITY16F_ARB 0x881D
+#define GL_LUMINANCE16F_ARB 0x881E
+#define GL_LUMINANCE_ALPHA16F_ARB 0x881F
+#endif
+
+#ifndef GL_ARB_pixel_buffer_object
+#define GL_PIXEL_PACK_BUFFER_ARB 0x88EB
+#define GL_PIXEL_UNPACK_BUFFER_ARB 0x88EC
+#define GL_PIXEL_PACK_BUFFER_BINDING_ARB 0x88ED
+#define GL_PIXEL_UNPACK_BUFFER_BINDING_ARB 0x88EF
+#endif
+
+#ifndef GL_ARB_depth_buffer_float
+#define GL_DEPTH_COMPONENT32F 0x8CAC
+#define GL_DEPTH32F_STENCIL8 0x8CAD
+#define GL_FLOAT_32_UNSIGNED_INT_24_8_REV 0x8DAD
+#endif
+
+#ifndef GL_ARB_draw_instanced
+#endif
+
+#ifndef GL_ARB_framebuffer_object
+#define GL_INVALID_FRAMEBUFFER_OPERATION 0x0506
+#define GL_FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING 0x8210
+#define GL_FRAMEBUFFER_ATTACHMENT_COMPONENT_TYPE 0x8211
+#define GL_FRAMEBUFFER_ATTACHMENT_RED_SIZE 0x8212
+#define GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE 0x8213
+#define GL_FRAMEBUFFER_ATTACHMENT_BLUE_SIZE 0x8214
+#define GL_FRAMEBUFFER_ATTACHMENT_ALPHA_SIZE 0x8215
+#define GL_FRAMEBUFFER_ATTACHMENT_DEPTH_SIZE 0x8216
+#define GL_FRAMEBUFFER_ATTACHMENT_STENCIL_SIZE 0x8217
+#define GL_FRAMEBUFFER_DEFAULT 0x8218
+#define GL_FRAMEBUFFER_UNDEFINED 0x8219
+#define GL_DEPTH_STENCIL_ATTACHMENT 0x821A
+#define GL_INDEX 0x8222
+#define GL_MAX_RENDERBUFFER_SIZE 0x84E8
+#define GL_DEPTH_STENCIL 0x84F9
+#define GL_UNSIGNED_INT_24_8 0x84FA
+#define GL_DEPTH24_STENCIL8 0x88F0
+#define GL_TEXTURE_STENCIL_SIZE 0x88F1
+#define GL_FRAMEBUFFER_BINDING 0x8CA6
+#define GL_DRAW_FRAMEBUFFER_BINDING GL_FRAMEBUFFER_BINDING
+#define GL_RENDERBUFFER_BINDING 0x8CA7
+#define GL_READ_FRAMEBUFFER 0x8CA8
+#define GL_DRAW_FRAMEBUFFER 0x8CA9
+#define GL_READ_FRAMEBUFFER_BINDING 0x8CAA
+#define GL_RENDERBUFFER_SAMPLES 0x8CAB
+#define GL_FRAMEBUFFER_ATTACHMENT_TYPE 0x8CD0
+#define GL_FRAMEBUFFER_ATTACHMENT_NAME 0x8CD1
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL 0x8CD2
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE 0x8CD3
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LAYER 0x8CD4
+#define GL_FRAMEBUFFER_COMPLETE 0x8CD5
+#define GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT 0x8CD6
+#define GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT 0x8CD7
+#define GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER 0x8CDB
+#define GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER 0x8CDC
+#define GL_FRAMEBUFFER_UNSUPPORTED 0x8CDD
+#define GL_MAX_COLOR_ATTACHMENTS 0x8CDF
+#define GL_COLOR_ATTACHMENT0 0x8CE0
+#define GL_COLOR_ATTACHMENT1 0x8CE1
+#define GL_COLOR_ATTACHMENT2 0x8CE2
+#define GL_COLOR_ATTACHMENT3 0x8CE3
+#define GL_COLOR_ATTACHMENT4 0x8CE4
+#define GL_COLOR_ATTACHMENT5 0x8CE5
+#define GL_COLOR_ATTACHMENT6 0x8CE6
+#define GL_COLOR_ATTACHMENT7 0x8CE7
+#define GL_COLOR_ATTACHMENT8 0x8CE8
+#define GL_COLOR_ATTACHMENT9 0x8CE9
+#define GL_COLOR_ATTACHMENT10 0x8CEA
+#define GL_COLOR_ATTACHMENT11 0x8CEB
+#define GL_COLOR_ATTACHMENT12 0x8CEC
+#define GL_COLOR_ATTACHMENT13 0x8CED
+#define GL_COLOR_ATTACHMENT14 0x8CEE
+#define GL_COLOR_ATTACHMENT15 0x8CEF
+#define GL_DEPTH_ATTACHMENT 0x8D00
+#define GL_STENCIL_ATTACHMENT 0x8D20
+#define GL_FRAMEBUFFER 0x8D40
+#define GL_RENDERBUFFER 0x8D41
+#define GL_RENDERBUFFER_WIDTH 0x8D42
+#define GL_RENDERBUFFER_HEIGHT 0x8D43
+#define GL_RENDERBUFFER_INTERNAL_FORMAT 0x8D44
+#define GL_STENCIL_INDEX1 0x8D46
+#define GL_STENCIL_INDEX4 0x8D47
+#define GL_STENCIL_INDEX8 0x8D48
+#define GL_STENCIL_INDEX16 0x8D49
+#define GL_RENDERBUFFER_RED_SIZE 0x8D50
+#define GL_RENDERBUFFER_GREEN_SIZE 0x8D51
+#define GL_RENDERBUFFER_BLUE_SIZE 0x8D52
+#define GL_RENDERBUFFER_ALPHA_SIZE 0x8D53
+#define GL_RENDERBUFFER_DEPTH_SIZE 0x8D54
+#define GL_RENDERBUFFER_STENCIL_SIZE 0x8D55
+#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE 0x8D56
+#define GL_MAX_SAMPLES 0x8D57
+#endif
+
+#ifndef GL_ARB_framebuffer_sRGB
+#define GL_FRAMEBUFFER_SRGB 0x8DB9
+#endif
+
+#ifndef GL_ARB_geometry_shader4
+#define GL_LINES_ADJACENCY_ARB 0x000A
+#define GL_LINE_STRIP_ADJACENCY_ARB 0x000B
+#define GL_TRIANGLES_ADJACENCY_ARB 0x000C
+#define GL_TRIANGLE_STRIP_ADJACENCY_ARB 0x000D
+#define GL_PROGRAM_POINT_SIZE_ARB 0x8642
+#define GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS_ARB 0x8C29
+#define GL_FRAMEBUFFER_ATTACHMENT_LAYERED_ARB 0x8DA7
+#define GL_FRAMEBUFFER_INCOMPLETE_LAYER_TARGETS_ARB 0x8DA8
+#define GL_FRAMEBUFFER_INCOMPLETE_LAYER_COUNT_ARB 0x8DA9
+#define GL_GEOMETRY_SHADER_ARB 0x8DD9
+#define GL_GEOMETRY_VERTICES_OUT_ARB 0x8DDA
+#define GL_GEOMETRY_INPUT_TYPE_ARB 0x8DDB
+#define GL_GEOMETRY_OUTPUT_TYPE_ARB 0x8DDC
+#define GL_MAX_GEOMETRY_VARYING_COMPONENTS_ARB 0x8DDD
+#define GL_MAX_VERTEX_VARYING_COMPONENTS_ARB 0x8DDE
+#define GL_MAX_GEOMETRY_UNIFORM_COMPONENTS_ARB 0x8DDF
+#define GL_MAX_GEOMETRY_OUTPUT_VERTICES_ARB 0x8DE0
+#define GL_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS_ARB 0x8DE1
+/* reuse GL_MAX_VARYING_COMPONENTS */
+/* reuse GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LAYER */
+#endif
+
+#ifndef GL_ARB_half_float_vertex
+#define GL_HALF_FLOAT 0x140B
+#endif
+
+#ifndef GL_ARB_instanced_arrays
+#define GL_VERTEX_ATTRIB_ARRAY_DIVISOR_ARB 0x88FE
+#endif
+
+#ifndef GL_ARB_map_buffer_range
+#define GL_MAP_READ_BIT 0x0001
+#define GL_MAP_WRITE_BIT 0x0002
+#define GL_MAP_INVALIDATE_RANGE_BIT 0x0004
+#define GL_MAP_INVALIDATE_BUFFER_BIT 0x0008
+#define GL_MAP_FLUSH_EXPLICIT_BIT 0x0010
+#define GL_MAP_UNSYNCHRONIZED_BIT 0x0020
+#endif
+
+#ifndef GL_ARB_texture_buffer_object
+#define GL_TEXTURE_BUFFER_ARB 0x8C2A
+#define GL_MAX_TEXTURE_BUFFER_SIZE_ARB 0x8C2B
+#define GL_TEXTURE_BINDING_BUFFER_ARB 0x8C2C
+#define GL_TEXTURE_BUFFER_DATA_STORE_BINDING_ARB 0x8C2D
+#define GL_TEXTURE_BUFFER_FORMAT_ARB 0x8C2E
+#endif
+
+#ifndef GL_ARB_texture_compression_rgtc
+#define GL_COMPRESSED_RED_RGTC1 0x8DBB
+#define GL_COMPRESSED_SIGNED_RED_RGTC1 0x8DBC
+#define GL_COMPRESSED_RG_RGTC2 0x8DBD
+#define GL_COMPRESSED_SIGNED_RG_RGTC2 0x8DBE
+#endif
+
+#ifndef GL_ARB_texture_rg
+#define GL_RG 0x8227
+#define GL_RG_INTEGER 0x8228
+#define GL_R8 0x8229
+#define GL_R16 0x822A
+#define GL_RG8 0x822B
+#define GL_RG16 0x822C
+#define GL_R16F 0x822D
+#define GL_R32F 0x822E
+#define GL_RG16F 0x822F
+#define GL_RG32F 0x8230
+#define GL_R8I 0x8231
+#define GL_R8UI 0x8232
+#define GL_R16I 0x8233
+#define GL_R16UI 0x8234
+#define GL_R32I 0x8235
+#define GL_R32UI 0x8236
+#define GL_RG8I 0x8237
+#define GL_RG8UI 0x8238
+#define GL_RG16I 0x8239
+#define GL_RG16UI 0x823A
+#define GL_RG32I 0x823B
+#define GL_RG32UI 0x823C
+#endif
+
+#ifndef GL_ARB_vertex_array_object
+#define GL_VERTEX_ARRAY_BINDING 0x85B5
+#endif
+
+#ifndef GL_ARB_uniform_buffer_object
+#define GL_UNIFORM_BUFFER 0x8A11
+#define GL_UNIFORM_BUFFER_BINDING 0x8A28
+#define GL_UNIFORM_BUFFER_START 0x8A29
+#define GL_UNIFORM_BUFFER_SIZE 0x8A2A
+#define GL_MAX_VERTEX_UNIFORM_BLOCKS 0x8A2B
+#define GL_MAX_GEOMETRY_UNIFORM_BLOCKS 0x8A2C
+#define GL_MAX_FRAGMENT_UNIFORM_BLOCKS 0x8A2D
+#define GL_MAX_COMBINED_UNIFORM_BLOCKS 0x8A2E
+#define GL_MAX_UNIFORM_BUFFER_BINDINGS 0x8A2F
+#define GL_MAX_UNIFORM_BLOCK_SIZE 0x8A30
+#define GL_MAX_COMBINED_VERTEX_UNIFORM_COMPONENTS 0x8A31
+#define GL_MAX_COMBINED_GEOMETRY_UNIFORM_COMPONENTS 0x8A32
+#define GL_MAX_COMBINED_FRAGMENT_UNIFORM_COMPONENTS 0x8A33
+#define GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT 0x8A34
+#define GL_ACTIVE_UNIFORM_BLOCK_MAX_NAME_LENGTH 0x8A35
+#define GL_ACTIVE_UNIFORM_BLOCKS 0x8A36
+#define GL_UNIFORM_TYPE 0x8A37
+#define GL_UNIFORM_SIZE 0x8A38
+#define GL_UNIFORM_NAME_LENGTH 0x8A39
+#define GL_UNIFORM_BLOCK_INDEX 0x8A3A
+#define GL_UNIFORM_OFFSET 0x8A3B
+#define GL_UNIFORM_ARRAY_STRIDE 0x8A3C
+#define GL_UNIFORM_MATRIX_STRIDE 0x8A3D
+#define GL_UNIFORM_IS_ROW_MAJOR 0x8A3E
+#define GL_UNIFORM_BLOCK_BINDING 0x8A3F
+#define GL_UNIFORM_BLOCK_DATA_SIZE 0x8A40
+#define GL_UNIFORM_BLOCK_NAME_LENGTH 0x8A41
+#define GL_UNIFORM_BLOCK_ACTIVE_UNIFORMS 0x8A42
+#define GL_UNIFORM_BLOCK_ACTIVE_UNIFORM_INDICES 0x8A43
+#define GL_UNIFORM_BLOCK_REFERENCED_BY_VERTEX_SHADER 0x8A44
+#define GL_UNIFORM_BLOCK_REFERENCED_BY_GEOMETRY_SHADER 0x8A45
+#define GL_UNIFORM_BLOCK_REFERENCED_BY_FRAGMENT_SHADER 0x8A46
+#define GL_INVALID_INDEX_ARB 0xFFFFFFFFu
+#endif
+
+#ifndef GL_ARB_compatibility
+/* ARB_compatibility just defines tokens from core 3.0 */
+#endif
+
+#ifndef GL_ARB_copy_buffer
+#define GL_COPY_READ_BUFFER 0x8F36
+#define GL_COPY_WRITE_BUFFER 0x8F37
+#endif
+
+#ifndef GL_EXT_abgr
+#define GL_ABGR_EXT 0x8000
+#endif
+
+#ifndef GL_EXT_blend_color
+#define GL_CONSTANT_COLOR_EXT 0x8001
+#define GL_ONE_MINUS_CONSTANT_COLOR_EXT 0x8002
+#define GL_CONSTANT_ALPHA_EXT 0x8003
+#define GL_ONE_MINUS_CONSTANT_ALPHA_EXT 0x8004
+#define GL_BLEND_COLOR_EXT 0x8005
+#endif
+
+#ifndef GL_EXT_polygon_offset
+#define GL_POLYGON_OFFSET_EXT 0x8037
+#define GL_POLYGON_OFFSET_FACTOR_EXT 0x8038
+#define GL_POLYGON_OFFSET_BIAS_EXT 0x8039
+#endif
+
+#ifndef GL_EXT_texture
+#define GL_ALPHA4_EXT 0x803B
+#define GL_ALPHA8_EXT 0x803C
+#define GL_ALPHA12_EXT 0x803D
+#define GL_ALPHA16_EXT 0x803E
+#define GL_LUMINANCE4_EXT 0x803F
+#define GL_LUMINANCE8_EXT 0x8040
+#define GL_LUMINANCE12_EXT 0x8041
+#define GL_LUMINANCE16_EXT 0x8042
+#define GL_LUMINANCE4_ALPHA4_EXT 0x8043
+#define GL_LUMINANCE6_ALPHA2_EXT 0x8044
+#define GL_LUMINANCE8_ALPHA8_EXT 0x8045
+#define GL_LUMINANCE12_ALPHA4_EXT 0x8046
+#define GL_LUMINANCE12_ALPHA12_EXT 0x8047
+#define GL_LUMINANCE16_ALPHA16_EXT 0x8048
+#define GL_INTENSITY_EXT 0x8049
+#define GL_INTENSITY4_EXT 0x804A
+#define GL_INTENSITY8_EXT 0x804B
+#define GL_INTENSITY12_EXT 0x804C
+#define GL_INTENSITY16_EXT 0x804D
+#define GL_RGB2_EXT 0x804E
+#define GL_RGB4_EXT 0x804F
+#define GL_RGB5_EXT 0x8050
+#define GL_RGB8_EXT 0x8051
+#define GL_RGB10_EXT 0x8052
+#define GL_RGB12_EXT 0x8053
+#define GL_RGB16_EXT 0x8054
+#define GL_RGBA2_EXT 0x8055
+#define GL_RGBA4_EXT 0x8056
+#define GL_RGB5_A1_EXT 0x8057
+#define GL_RGBA8_EXT 0x8058
+#define GL_RGB10_A2_EXT 0x8059
+#define GL_RGBA12_EXT 0x805A
+#define GL_RGBA16_EXT 0x805B
+#define GL_TEXTURE_RED_SIZE_EXT 0x805C
+#define GL_TEXTURE_GREEN_SIZE_EXT 0x805D
+#define GL_TEXTURE_BLUE_SIZE_EXT 0x805E
+#define GL_TEXTURE_ALPHA_SIZE_EXT 0x805F
+#define GL_TEXTURE_LUMINANCE_SIZE_EXT 0x8060
+#define GL_TEXTURE_INTENSITY_SIZE_EXT 0x8061
+#define GL_REPLACE_EXT 0x8062
+#define GL_PROXY_TEXTURE_1D_EXT 0x8063
+#define GL_PROXY_TEXTURE_2D_EXT 0x8064
+#define GL_TEXTURE_TOO_LARGE_EXT 0x8065
+#endif
+
+#ifndef GL_EXT_texture3D
+#define GL_PACK_SKIP_IMAGES_EXT 0x806B
+#define GL_PACK_IMAGE_HEIGHT_EXT 0x806C
+#define GL_UNPACK_SKIP_IMAGES_EXT 0x806D
+#define GL_UNPACK_IMAGE_HEIGHT_EXT 0x806E
+#define GL_TEXTURE_3D_EXT 0x806F
+#define GL_PROXY_TEXTURE_3D_EXT 0x8070
+#define GL_TEXTURE_DEPTH_EXT 0x8071
+#define GL_TEXTURE_WRAP_R_EXT 0x8072
+#define GL_MAX_3D_TEXTURE_SIZE_EXT 0x8073
+#endif
+
+#ifndef GL_SGIS_texture_filter4
+#define GL_FILTER4_SGIS 0x8146
+#define GL_TEXTURE_FILTER4_SIZE_SGIS 0x8147
+#endif
+
+#ifndef GL_EXT_subtexture
+#endif
+
+#ifndef GL_EXT_copy_texture
+#endif
+
+#ifndef GL_EXT_histogram
+#define GL_HISTOGRAM_EXT 0x8024
+#define GL_PROXY_HISTOGRAM_EXT 0x8025
+#define GL_HISTOGRAM_WIDTH_EXT 0x8026
+#define GL_HISTOGRAM_FORMAT_EXT 0x8027
+#define GL_HISTOGRAM_RED_SIZE_EXT 0x8028
+#define GL_HISTOGRAM_GREEN_SIZE_EXT 0x8029
+#define GL_HISTOGRAM_BLUE_SIZE_EXT 0x802A
+#define GL_HISTOGRAM_ALPHA_SIZE_EXT 0x802B
+#define GL_HISTOGRAM_LUMINANCE_SIZE_EXT 0x802C
+#define GL_HISTOGRAM_SINK_EXT 0x802D
+#define GL_MINMAX_EXT 0x802E
+#define GL_MINMAX_FORMAT_EXT 0x802F
+#define GL_MINMAX_SINK_EXT 0x8030
+#define GL_TABLE_TOO_LARGE_EXT 0x8031
+#endif
+
+#ifndef GL_EXT_convolution
+#define GL_CONVOLUTION_1D_EXT 0x8010
+#define GL_CONVOLUTION_2D_EXT 0x8011
+#define GL_SEPARABLE_2D_EXT 0x8012
+#define GL_CONVOLUTION_BORDER_MODE_EXT 0x8013
+#define GL_CONVOLUTION_FILTER_SCALE_EXT 0x8014
+#define GL_CONVOLUTION_FILTER_BIAS_EXT 0x8015
+#define GL_REDUCE_EXT 0x8016
+#define GL_CONVOLUTION_FORMAT_EXT 0x8017
+#define GL_CONVOLUTION_WIDTH_EXT 0x8018
+#define GL_CONVOLUTION_HEIGHT_EXT 0x8019
+#define GL_MAX_CONVOLUTION_WIDTH_EXT 0x801A
+#define GL_MAX_CONVOLUTION_HEIGHT_EXT 0x801B
+#define GL_POST_CONVOLUTION_RED_SCALE_EXT 0x801C
+#define GL_POST_CONVOLUTION_GREEN_SCALE_EXT 0x801D
+#define GL_POST_CONVOLUTION_BLUE_SCALE_EXT 0x801E
+#define GL_POST_CONVOLUTION_ALPHA_SCALE_EXT 0x801F
+#define GL_POST_CONVOLUTION_RED_BIAS_EXT 0x8020
+#define GL_POST_CONVOLUTION_GREEN_BIAS_EXT 0x8021
+#define GL_POST_CONVOLUTION_BLUE_BIAS_EXT 0x8022
+#define GL_POST_CONVOLUTION_ALPHA_BIAS_EXT 0x8023
+#endif
+
+#ifndef GL_SGI_color_matrix
+#define GL_COLOR_MATRIX_SGI 0x80B1
+#define GL_COLOR_MATRIX_STACK_DEPTH_SGI 0x80B2
+#define GL_MAX_COLOR_MATRIX_STACK_DEPTH_SGI 0x80B3
+#define GL_POST_COLOR_MATRIX_RED_SCALE_SGI 0x80B4
+#define GL_POST_COLOR_MATRIX_GREEN_SCALE_SGI 0x80B5
+#define GL_POST_COLOR_MATRIX_BLUE_SCALE_SGI 0x80B6
+#define GL_POST_COLOR_MATRIX_ALPHA_SCALE_SGI 0x80B7
+#define GL_POST_COLOR_MATRIX_RED_BIAS_SGI 0x80B8
+#define GL_POST_COLOR_MATRIX_GREEN_BIAS_SGI 0x80B9
+#define GL_POST_COLOR_MATRIX_BLUE_BIAS_SGI 0x80BA
+#define GL_POST_COLOR_MATRIX_ALPHA_BIAS_SGI 0x80BB
+#endif
+
+#ifndef GL_SGI_color_table
+#define GL_COLOR_TABLE_SGI 0x80D0
+#define GL_POST_CONVOLUTION_COLOR_TABLE_SGI 0x80D1
+#define GL_POST_COLOR_MATRIX_COLOR_TABLE_SGI 0x80D2
+#define GL_PROXY_COLOR_TABLE_SGI 0x80D3
+#define GL_PROXY_POST_CONVOLUTION_COLOR_TABLE_SGI 0x80D4
+#define GL_PROXY_POST_COLOR_MATRIX_COLOR_TABLE_SGI 0x80D5
+#define GL_COLOR_TABLE_SCALE_SGI 0x80D6
+#define GL_COLOR_TABLE_BIAS_SGI 0x80D7
+#define GL_COLOR_TABLE_FORMAT_SGI 0x80D8
+#define GL_COLOR_TABLE_WIDTH_SGI 0x80D9
+#define GL_COLOR_TABLE_RED_SIZE_SGI 0x80DA
+#define GL_COLOR_TABLE_GREEN_SIZE_SGI 0x80DB
+#define GL_COLOR_TABLE_BLUE_SIZE_SGI 0x80DC
+#define GL_COLOR_TABLE_ALPHA_SIZE_SGI 0x80DD
+#define GL_COLOR_TABLE_LUMINANCE_SIZE_SGI 0x80DE
+#define GL_COLOR_TABLE_INTENSITY_SIZE_SGI 0x80DF
+#endif
+
+#ifndef GL_SGIS_pixel_texture
+#define GL_PIXEL_TEXTURE_SGIS 0x8353
+#define GL_PIXEL_FRAGMENT_RGB_SOURCE_SGIS 0x8354
+#define GL_PIXEL_FRAGMENT_ALPHA_SOURCE_SGIS 0x8355
+#define GL_PIXEL_GROUP_COLOR_SGIS 0x8356
+#endif
+
+#ifndef GL_SGIX_pixel_texture
+#define GL_PIXEL_TEX_GEN_SGIX 0x8139
+#define GL_PIXEL_TEX_GEN_MODE_SGIX 0x832B
+#endif
+
+#ifndef GL_SGIS_texture4D
+#define GL_PACK_SKIP_VOLUMES_SGIS 0x8130
+#define GL_PACK_IMAGE_DEPTH_SGIS 0x8131
+#define GL_UNPACK_SKIP_VOLUMES_SGIS 0x8132
+#define GL_UNPACK_IMAGE_DEPTH_SGIS 0x8133
+#define GL_TEXTURE_4D_SGIS 0x8134
+#define GL_PROXY_TEXTURE_4D_SGIS 0x8135
+#define GL_TEXTURE_4DSIZE_SGIS 0x8136
+#define GL_TEXTURE_WRAP_Q_SGIS 0x8137
+#define GL_MAX_4D_TEXTURE_SIZE_SGIS 0x8138
+#define GL_TEXTURE_4D_BINDING_SGIS 0x814F
+#endif
+
+#ifndef GL_SGI_texture_color_table
+#define GL_TEXTURE_COLOR_TABLE_SGI 0x80BC
+#define GL_PROXY_TEXTURE_COLOR_TABLE_SGI 0x80BD
+#endif
+
+#ifndef GL_EXT_cmyka
+#define GL_CMYK_EXT 0x800C
+#define GL_CMYKA_EXT 0x800D
+#define GL_PACK_CMYK_HINT_EXT 0x800E
+#define GL_UNPACK_CMYK_HINT_EXT 0x800F
+#endif
+
+#ifndef GL_EXT_texture_object
+#define GL_TEXTURE_PRIORITY_EXT 0x8066
+#define GL_TEXTURE_RESIDENT_EXT 0x8067
+#define GL_TEXTURE_1D_BINDING_EXT 0x8068
+#define GL_TEXTURE_2D_BINDING_EXT 0x8069
+#define GL_TEXTURE_3D_BINDING_EXT 0x806A
+#endif
+
+#ifndef GL_SGIS_detail_texture
+#define GL_DETAIL_TEXTURE_2D_SGIS 0x8095
+#define GL_DETAIL_TEXTURE_2D_BINDING_SGIS 0x8096
+#define GL_LINEAR_DETAIL_SGIS 0x8097
+#define GL_LINEAR_DETAIL_ALPHA_SGIS 0x8098
+#define GL_LINEAR_DETAIL_COLOR_SGIS 0x8099
+#define GL_DETAIL_TEXTURE_LEVEL_SGIS 0x809A
+#define GL_DETAIL_TEXTURE_MODE_SGIS 0x809B
+#define GL_DETAIL_TEXTURE_FUNC_POINTS_SGIS 0x809C
+#endif
+
+#ifndef GL_SGIS_sharpen_texture
+#define GL_LINEAR_SHARPEN_SGIS 0x80AD
+#define GL_LINEAR_SHARPEN_ALPHA_SGIS 0x80AE
+#define GL_LINEAR_SHARPEN_COLOR_SGIS 0x80AF
+#define GL_SHARPEN_TEXTURE_FUNC_POINTS_SGIS 0x80B0
+#endif
+
+#ifndef GL_EXT_packed_pixels
+#define GL_UNSIGNED_BYTE_3_3_2_EXT 0x8032
+#define GL_UNSIGNED_SHORT_4_4_4_4_EXT 0x8033
+#define GL_UNSIGNED_SHORT_5_5_5_1_EXT 0x8034
+#define GL_UNSIGNED_INT_8_8_8_8_EXT 0x8035
+#define GL_UNSIGNED_INT_10_10_10_2_EXT 0x8036
+#endif
+
+#ifndef GL_SGIS_texture_lod
+#define GL_TEXTURE_MIN_LOD_SGIS 0x813A
+#define GL_TEXTURE_MAX_LOD_SGIS 0x813B
+#define GL_TEXTURE_BASE_LEVEL_SGIS 0x813C
+#define GL_TEXTURE_MAX_LEVEL_SGIS 0x813D
+#endif
+
+#ifndef GL_SGIS_multisample
+#define GL_MULTISAMPLE_SGIS 0x809D
+#define GL_SAMPLE_ALPHA_TO_MASK_SGIS 0x809E
+#define GL_SAMPLE_ALPHA_TO_ONE_SGIS 0x809F
+#define GL_SAMPLE_MASK_SGIS 0x80A0
+#define GL_1PASS_SGIS 0x80A1
+#define GL_2PASS_0_SGIS 0x80A2
+#define GL_2PASS_1_SGIS 0x80A3
+#define GL_4PASS_0_SGIS 0x80A4
+#define GL_4PASS_1_SGIS 0x80A5
+#define GL_4PASS_2_SGIS 0x80A6
+#define GL_4PASS_3_SGIS 0x80A7
+#define GL_SAMPLE_BUFFERS_SGIS 0x80A8
+#define GL_SAMPLES_SGIS 0x80A9
+#define GL_SAMPLE_MASK_VALUE_SGIS 0x80AA
+#define GL_SAMPLE_MASK_INVERT_SGIS 0x80AB
+#define GL_SAMPLE_PATTERN_SGIS 0x80AC
+#endif
+
+#ifndef GL_EXT_rescale_normal
+#define GL_RESCALE_NORMAL_EXT 0x803A
+#endif
+
+#ifndef GL_EXT_vertex_array
+#define GL_VERTEX_ARRAY_EXT 0x8074
+#define GL_NORMAL_ARRAY_EXT 0x8075
+#define GL_COLOR_ARRAY_EXT 0x8076
+#define GL_INDEX_ARRAY_EXT 0x8077
+#define GL_TEXTURE_COORD_ARRAY_EXT 0x8078
+#define GL_EDGE_FLAG_ARRAY_EXT 0x8079
+#define GL_VERTEX_ARRAY_SIZE_EXT 0x807A
+#define GL_VERTEX_ARRAY_TYPE_EXT 0x807B
+#define GL_VERTEX_ARRAY_STRIDE_EXT 0x807C
+#define GL_VERTEX_ARRAY_COUNT_EXT 0x807D
+#define GL_NORMAL_ARRAY_TYPE_EXT 0x807E
+#define GL_NORMAL_ARRAY_STRIDE_EXT 0x807F
+#define GL_NORMAL_ARRAY_COUNT_EXT 0x8080
+#define GL_COLOR_ARRAY_SIZE_EXT 0x8081
+#define GL_COLOR_ARRAY_TYPE_EXT 0x8082
+#define GL_COLOR_ARRAY_STRIDE_EXT 0x8083
+#define GL_COLOR_ARRAY_COUNT_EXT 0x8084
+#define GL_INDEX_ARRAY_TYPE_EXT 0x8085
+#define GL_INDEX_ARRAY_STRIDE_EXT 0x8086
+#define GL_INDEX_ARRAY_COUNT_EXT 0x8087
+#define GL_TEXTURE_COORD_ARRAY_SIZE_EXT 0x8088
+#define GL_TEXTURE_COORD_ARRAY_TYPE_EXT 0x8089
+#define GL_TEXTURE_COORD_ARRAY_STRIDE_EXT 0x808A
+#define GL_TEXTURE_COORD_ARRAY_COUNT_EXT 0x808B
+#define GL_EDGE_FLAG_ARRAY_STRIDE_EXT 0x808C
+#define GL_EDGE_FLAG_ARRAY_COUNT_EXT 0x808D
+#define GL_VERTEX_ARRAY_POINTER_EXT 0x808E
+#define GL_NORMAL_ARRAY_POINTER_EXT 0x808F
+#define GL_COLOR_ARRAY_POINTER_EXT 0x8090
+#define GL_INDEX_ARRAY_POINTER_EXT 0x8091
+#define GL_TEXTURE_COORD_ARRAY_POINTER_EXT 0x8092
+#define GL_EDGE_FLAG_ARRAY_POINTER_EXT 0x8093
+#endif
+
+#ifndef GL_EXT_misc_attribute
+#endif
+
+#ifndef GL_SGIS_generate_mipmap
+#define GL_GENERATE_MIPMAP_SGIS 0x8191
+#define GL_GENERATE_MIPMAP_HINT_SGIS 0x8192
+#endif
+
+#ifndef GL_SGIX_clipmap
+#define GL_LINEAR_CLIPMAP_LINEAR_SGIX 0x8170
+#define GL_TEXTURE_CLIPMAP_CENTER_SGIX 0x8171
+#define GL_TEXTURE_CLIPMAP_FRAME_SGIX 0x8172
+#define GL_TEXTURE_CLIPMAP_OFFSET_SGIX 0x8173
+#define GL_TEXTURE_CLIPMAP_VIRTUAL_DEPTH_SGIX 0x8174
+#define GL_TEXTURE_CLIPMAP_LOD_OFFSET_SGIX 0x8175
+#define GL_TEXTURE_CLIPMAP_DEPTH_SGIX 0x8176
+#define GL_MAX_CLIPMAP_DEPTH_SGIX 0x8177
+#define GL_MAX_CLIPMAP_VIRTUAL_DEPTH_SGIX 0x8178
+#define GL_NEAREST_CLIPMAP_NEAREST_SGIX 0x844D
+#define GL_NEAREST_CLIPMAP_LINEAR_SGIX 0x844E
+#define GL_LINEAR_CLIPMAP_NEAREST_SGIX 0x844F
+#endif
+
+#ifndef GL_SGIX_shadow
+#define GL_TEXTURE_COMPARE_SGIX 0x819A
+#define GL_TEXTURE_COMPARE_OPERATOR_SGIX 0x819B
+#define GL_TEXTURE_LEQUAL_R_SGIX 0x819C
+#define GL_TEXTURE_GEQUAL_R_SGIX 0x819D
+#endif
+
+#ifndef GL_SGIS_texture_edge_clamp
+#define GL_CLAMP_TO_EDGE_SGIS 0x812F
+#endif
+
+#ifndef GL_SGIS_texture_border_clamp
+#define GL_CLAMP_TO_BORDER_SGIS 0x812D
+#endif
+
+#ifndef GL_EXT_blend_minmax
+#define GL_FUNC_ADD_EXT 0x8006
+#define GL_MIN_EXT 0x8007
+#define GL_MAX_EXT 0x8008
+#define GL_BLEND_EQUATION_EXT 0x8009
+#endif
+
+#ifndef GL_EXT_blend_subtract
+#define GL_FUNC_SUBTRACT_EXT 0x800A
+#define GL_FUNC_REVERSE_SUBTRACT_EXT 0x800B
+#endif
+
+#ifndef GL_EXT_blend_logic_op
+#endif
+
+#ifndef GL_SGIX_interlace
+#define GL_INTERLACE_SGIX 0x8094
+#endif
+
+#ifndef GL_SGIX_pixel_tiles
+#define GL_PIXEL_TILE_BEST_ALIGNMENT_SGIX 0x813E
+#define GL_PIXEL_TILE_CACHE_INCREMENT_SGIX 0x813F
+#define GL_PIXEL_TILE_WIDTH_SGIX 0x8140
+#define GL_PIXEL_TILE_HEIGHT_SGIX 0x8141
+#define GL_PIXEL_TILE_GRID_WIDTH_SGIX 0x8142
+#define GL_PIXEL_TILE_GRID_HEIGHT_SGIX 0x8143
+#define GL_PIXEL_TILE_GRID_DEPTH_SGIX 0x8144
+#define GL_PIXEL_TILE_CACHE_SIZE_SGIX 0x8145
+#endif
+
+#ifndef GL_SGIS_texture_select
+#define GL_DUAL_ALPHA4_SGIS 0x8110
+#define GL_DUAL_ALPHA8_SGIS 0x8111
+#define GL_DUAL_ALPHA12_SGIS 0x8112
+#define GL_DUAL_ALPHA16_SGIS 0x8113
+#define GL_DUAL_LUMINANCE4_SGIS 0x8114
+#define GL_DUAL_LUMINANCE8_SGIS 0x8115
+#define GL_DUAL_LUMINANCE12_SGIS 0x8116
+#define GL_DUAL_LUMINANCE16_SGIS 0x8117
+#define GL_DUAL_INTENSITY4_SGIS 0x8118
+#define GL_DUAL_INTENSITY8_SGIS 0x8119
+#define GL_DUAL_INTENSITY12_SGIS 0x811A
+#define GL_DUAL_INTENSITY16_SGIS 0x811B
+#define GL_DUAL_LUMINANCE_ALPHA4_SGIS 0x811C
+#define GL_DUAL_LUMINANCE_ALPHA8_SGIS 0x811D
+#define GL_QUAD_ALPHA4_SGIS 0x811E
+#define GL_QUAD_ALPHA8_SGIS 0x811F
+#define GL_QUAD_LUMINANCE4_SGIS 0x8120
+#define GL_QUAD_LUMINANCE8_SGIS 0x8121
+#define GL_QUAD_INTENSITY4_SGIS 0x8122
+#define GL_QUAD_INTENSITY8_SGIS 0x8123
+#define GL_DUAL_TEXTURE_SELECT_SGIS 0x8124
+#define GL_QUAD_TEXTURE_SELECT_SGIS 0x8125
+#endif
+
+#ifndef GL_SGIX_sprite
+#define GL_SPRITE_SGIX 0x8148
+#define GL_SPRITE_MODE_SGIX 0x8149
+#define GL_SPRITE_AXIS_SGIX 0x814A
+#define GL_SPRITE_TRANSLATION_SGIX 0x814B
+#define GL_SPRITE_AXIAL_SGIX 0x814C
+#define GL_SPRITE_ALIGNED_SGIX 0x814D
+#define GL_SPRITE_EYE_ALIGNED_SGIX 0x814E
+#endif
+
+#ifndef GL_SGIX_texture_multi_buffer
+#define GL_TEXTURE_MULTI_BUFFER_HINT_SGIX 0x812E
+#endif
+
+#ifndef GL_EXT_point_parameters
+#define GL_POINT_SIZE_MIN_EXT 0x8126
+#define GL_POINT_SIZE_MAX_EXT 0x8127
+#define GL_POINT_FADE_THRESHOLD_SIZE_EXT 0x8128
+#define GL_DISTANCE_ATTENUATION_EXT 0x8129
+#endif
+
+#ifndef GL_SGIS_point_parameters
+#define GL_POINT_SIZE_MIN_SGIS 0x8126
+#define GL_POINT_SIZE_MAX_SGIS 0x8127
+#define GL_POINT_FADE_THRESHOLD_SIZE_SGIS 0x8128
+#define GL_DISTANCE_ATTENUATION_SGIS 0x8129
+#endif
+
+#ifndef GL_SGIX_instruments
+#define GL_INSTRUMENT_BUFFER_POINTER_SGIX 0x8180
+#define GL_INSTRUMENT_MEASUREMENTS_SGIX 0x8181
+#endif
+
+#ifndef GL_SGIX_texture_scale_bias
+#define GL_POST_TEXTURE_FILTER_BIAS_SGIX 0x8179
+#define GL_POST_TEXTURE_FILTER_SCALE_SGIX 0x817A
+#define GL_POST_TEXTURE_FILTER_BIAS_RANGE_SGIX 0x817B
+#define GL_POST_TEXTURE_FILTER_SCALE_RANGE_SGIX 0x817C
+#endif
+
+#ifndef GL_SGIX_framezoom
+#define GL_FRAMEZOOM_SGIX 0x818B
+#define GL_FRAMEZOOM_FACTOR_SGIX 0x818C
+#define GL_MAX_FRAMEZOOM_FACTOR_SGIX 0x818D
+#endif
+
+#ifndef GL_SGIX_tag_sample_buffer
+#endif
+
+#ifndef GL_FfdMaskSGIX
+#define GL_TEXTURE_DEFORMATION_BIT_SGIX 0x00000001
+#define GL_GEOMETRY_DEFORMATION_BIT_SGIX 0x00000002
+#endif
+
+#ifndef GL_SGIX_polynomial_ffd
+#define GL_GEOMETRY_DEFORMATION_SGIX 0x8194
+#define GL_TEXTURE_DEFORMATION_SGIX 0x8195
+#define GL_DEFORMATIONS_MASK_SGIX 0x8196
+#define GL_MAX_DEFORMATION_ORDER_SGIX 0x8197
+#endif
+
+#ifndef GL_SGIX_reference_plane
+#define GL_REFERENCE_PLANE_SGIX 0x817D
+#define GL_REFERENCE_PLANE_EQUATION_SGIX 0x817E
+#endif
+
+#ifndef GL_SGIX_flush_raster
+#endif
+
+#ifndef GL_SGIX_depth_texture
+#define GL_DEPTH_COMPONENT16_SGIX 0x81A5
+#define GL_DEPTH_COMPONENT24_SGIX 0x81A6
+#define GL_DEPTH_COMPONENT32_SGIX 0x81A7
+#endif
+
+#ifndef GL_SGIS_fog_function
+#define GL_FOG_FUNC_SGIS 0x812A
+#define GL_FOG_FUNC_POINTS_SGIS 0x812B
+#define GL_MAX_FOG_FUNC_POINTS_SGIS 0x812C
+#endif
+
+#ifndef GL_SGIX_fog_offset
+#define GL_FOG_OFFSET_SGIX 0x8198
+#define GL_FOG_OFFSET_VALUE_SGIX 0x8199
+#endif
+
+#ifndef GL_HP_image_transform
+#define GL_IMAGE_SCALE_X_HP 0x8155
+#define GL_IMAGE_SCALE_Y_HP 0x8156
+#define GL_IMAGE_TRANSLATE_X_HP 0x8157
+#define GL_IMAGE_TRANSLATE_Y_HP 0x8158
+#define GL_IMAGE_ROTATE_ANGLE_HP 0x8159
+#define GL_IMAGE_ROTATE_ORIGIN_X_HP 0x815A
+#define GL_IMAGE_ROTATE_ORIGIN_Y_HP 0x815B
+#define GL_IMAGE_MAG_FILTER_HP 0x815C
+#define GL_IMAGE_MIN_FILTER_HP 0x815D
+#define GL_IMAGE_CUBIC_WEIGHT_HP 0x815E
+#define GL_CUBIC_HP 0x815F
+#define GL_AVERAGE_HP 0x8160
+#define GL_IMAGE_TRANSFORM_2D_HP 0x8161
+#define GL_POST_IMAGE_TRANSFORM_COLOR_TABLE_HP 0x8162
+#define GL_PROXY_POST_IMAGE_TRANSFORM_COLOR_TABLE_HP 0x8163
+#endif
+
+#ifndef GL_HP_convolution_border_modes
+#define GL_IGNORE_BORDER_HP 0x8150
+#define GL_CONSTANT_BORDER_HP 0x8151
+#define GL_REPLICATE_BORDER_HP 0x8153
+#define GL_CONVOLUTION_BORDER_COLOR_HP 0x8154
+#endif
+
+#ifndef GL_INGR_palette_buffer
+#endif
+
+#ifndef GL_SGIX_texture_add_env
+#define GL_TEXTURE_ENV_BIAS_SGIX 0x80BE
+#endif
+
+#ifndef GL_EXT_color_subtable
+#endif
+
+#ifndef GL_PGI_vertex_hints
+#define GL_VERTEX_DATA_HINT_PGI 0x1A22A
+#define GL_VERTEX_CONSISTENT_HINT_PGI 0x1A22B
+#define GL_MATERIAL_SIDE_HINT_PGI 0x1A22C
+#define GL_MAX_VERTEX_HINT_PGI 0x1A22D
+#define GL_COLOR3_BIT_PGI 0x00010000
+#define GL_COLOR4_BIT_PGI 0x00020000
+#define GL_EDGEFLAG_BIT_PGI 0x00040000
+#define GL_INDEX_BIT_PGI 0x00080000
+#define GL_MAT_AMBIENT_BIT_PGI 0x00100000
+#define GL_MAT_AMBIENT_AND_DIFFUSE_BIT_PGI 0x00200000
+#define GL_MAT_DIFFUSE_BIT_PGI 0x00400000
+#define GL_MAT_EMISSION_BIT_PGI 0x00800000
+#define GL_MAT_COLOR_INDEXES_BIT_PGI 0x01000000
+#define GL_MAT_SHININESS_BIT_PGI 0x02000000
+#define GL_MAT_SPECULAR_BIT_PGI 0x04000000
+#define GL_NORMAL_BIT_PGI 0x08000000
+#define GL_TEXCOORD1_BIT_PGI 0x10000000
+#define GL_TEXCOORD2_BIT_PGI 0x20000000
+#define GL_TEXCOORD3_BIT_PGI 0x40000000
+#define GL_TEXCOORD4_BIT_PGI 0x80000000
+#define GL_VERTEX23_BIT_PGI 0x00000004
+#define GL_VERTEX4_BIT_PGI 0x00000008
+#endif
+
+#ifndef GL_PGI_misc_hints
+#define GL_PREFER_DOUBLEBUFFER_HINT_PGI 0x1A1F8
+#define GL_CONSERVE_MEMORY_HINT_PGI 0x1A1FD
+#define GL_RECLAIM_MEMORY_HINT_PGI 0x1A1FE
+#define GL_NATIVE_GRAPHICS_HANDLE_PGI 0x1A202
+#define GL_NATIVE_GRAPHICS_BEGIN_HINT_PGI 0x1A203
+#define GL_NATIVE_GRAPHICS_END_HINT_PGI 0x1A204
+#define GL_ALWAYS_FAST_HINT_PGI 0x1A20C
+#define GL_ALWAYS_SOFT_HINT_PGI 0x1A20D
+#define GL_ALLOW_DRAW_OBJ_HINT_PGI 0x1A20E
+#define GL_ALLOW_DRAW_WIN_HINT_PGI 0x1A20F
+#define GL_ALLOW_DRAW_FRG_HINT_PGI 0x1A210
+#define GL_ALLOW_DRAW_MEM_HINT_PGI 0x1A211
+#define GL_STRICT_DEPTHFUNC_HINT_PGI 0x1A216
+#define GL_STRICT_LIGHTING_HINT_PGI 0x1A217
+#define GL_STRICT_SCISSOR_HINT_PGI 0x1A218
+#define GL_FULL_STIPPLE_HINT_PGI 0x1A219
+#define GL_CLIP_NEAR_HINT_PGI 0x1A220
+#define GL_CLIP_FAR_HINT_PGI 0x1A221
+#define GL_WIDE_LINE_HINT_PGI 0x1A222
+#define GL_BACK_NORMALS_HINT_PGI 0x1A223
+#endif
+
+#ifndef GL_EXT_paletted_texture
+#define GL_COLOR_INDEX1_EXT 0x80E2
+#define GL_COLOR_INDEX2_EXT 0x80E3
+#define GL_COLOR_INDEX4_EXT 0x80E4
+#define GL_COLOR_INDEX8_EXT 0x80E5
+#define GL_COLOR_INDEX12_EXT 0x80E6
+#define GL_COLOR_INDEX16_EXT 0x80E7
+#define GL_TEXTURE_INDEX_SIZE_EXT 0x80ED
+#endif
+
+#ifndef GL_EXT_clip_volume_hint
+#define GL_CLIP_VOLUME_CLIPPING_HINT_EXT 0x80F0
+#endif
+
+#ifndef GL_SGIX_list_priority
+#define GL_LIST_PRIORITY_SGIX 0x8182
+#endif
+
+#ifndef GL_SGIX_ir_instrument1
+#define GL_IR_INSTRUMENT1_SGIX 0x817F
+#endif
+
+#ifndef GL_SGIX_calligraphic_fragment
+#define GL_CALLIGRAPHIC_FRAGMENT_SGIX 0x8183
+#endif
+
+#ifndef GL_SGIX_texture_lod_bias
+#define GL_TEXTURE_LOD_BIAS_S_SGIX 0x818E
+#define GL_TEXTURE_LOD_BIAS_T_SGIX 0x818F
+#define GL_TEXTURE_LOD_BIAS_R_SGIX 0x8190
+#endif
+
+#ifndef GL_SGIX_shadow_ambient
+#define GL_SHADOW_AMBIENT_SGIX 0x80BF
+#endif
+
+#ifndef GL_EXT_index_texture
+#endif
+
+#ifndef GL_EXT_index_material
+#define GL_INDEX_MATERIAL_EXT 0x81B8
+#define GL_INDEX_MATERIAL_PARAMETER_EXT 0x81B9
+#define GL_INDEX_MATERIAL_FACE_EXT 0x81BA
+#endif
+
+#ifndef GL_EXT_index_func
+#define GL_INDEX_TEST_EXT 0x81B5
+#define GL_INDEX_TEST_FUNC_EXT 0x81B6
+#define GL_INDEX_TEST_REF_EXT 0x81B7
+#endif
+
+#ifndef GL_EXT_index_array_formats
+#define GL_IUI_V2F_EXT 0x81AD
+#define GL_IUI_V3F_EXT 0x81AE
+#define GL_IUI_N3F_V2F_EXT 0x81AF
+#define GL_IUI_N3F_V3F_EXT 0x81B0
+#define GL_T2F_IUI_V2F_EXT 0x81B1
+#define GL_T2F_IUI_V3F_EXT 0x81B2
+#define GL_T2F_IUI_N3F_V2F_EXT 0x81B3
+#define GL_T2F_IUI_N3F_V3F_EXT 0x81B4
+#endif
+
+#ifndef GL_EXT_compiled_vertex_array
+#define GL_ARRAY_ELEMENT_LOCK_FIRST_EXT 0x81A8
+#define GL_ARRAY_ELEMENT_LOCK_COUNT_EXT 0x81A9
+#endif
+
+#ifndef GL_EXT_cull_vertex
+#define GL_CULL_VERTEX_EXT 0x81AA
+#define GL_CULL_VERTEX_EYE_POSITION_EXT 0x81AB
+#define GL_CULL_VERTEX_POSITION_EXT 0x81AC
+#endif
+
+#ifndef GL_SGIX_ycrcb
+#define GL_YCRCB_422_SGIX 0x81BB
+#define GL_YCRCB_444_SGIX 0x81BC
+#endif
+
+#ifndef GL_SGIX_fragment_lighting
+#define GL_FRAGMENT_LIGHTING_SGIX 0x8400
+#define GL_FRAGMENT_COLOR_MATERIAL_SGIX 0x8401
+#define GL_FRAGMENT_COLOR_MATERIAL_FACE_SGIX 0x8402
+#define GL_FRAGMENT_COLOR_MATERIAL_PARAMETER_SGIX 0x8403
+#define GL_MAX_FRAGMENT_LIGHTS_SGIX 0x8404
+#define GL_MAX_ACTIVE_LIGHTS_SGIX 0x8405
+#define GL_CURRENT_RASTER_NORMAL_SGIX 0x8406
+#define GL_LIGHT_ENV_MODE_SGIX 0x8407
+#define GL_FRAGMENT_LIGHT_MODEL_LOCAL_VIEWER_SGIX 0x8408
+#define GL_FRAGMENT_LIGHT_MODEL_TWO_SIDE_SGIX 0x8409
+#define GL_FRAGMENT_LIGHT_MODEL_AMBIENT_SGIX 0x840A
+#define GL_FRAGMENT_LIGHT_MODEL_NORMAL_INTERPOLATION_SGIX 0x840B
+#define GL_FRAGMENT_LIGHT0_SGIX 0x840C
+#define GL_FRAGMENT_LIGHT1_SGIX 0x840D
+#define GL_FRAGMENT_LIGHT2_SGIX 0x840E
+#define GL_FRAGMENT_LIGHT3_SGIX 0x840F
+#define GL_FRAGMENT_LIGHT4_SGIX 0x8410
+#define GL_FRAGMENT_LIGHT5_SGIX 0x8411
+#define GL_FRAGMENT_LIGHT6_SGIX 0x8412
+#define GL_FRAGMENT_LIGHT7_SGIX 0x8413
+#endif
+
+#ifndef GL_IBM_rasterpos_clip
+#define GL_RASTER_POSITION_UNCLIPPED_IBM 0x19262
+#endif
+
+#ifndef GL_HP_texture_lighting
+#define GL_TEXTURE_LIGHTING_MODE_HP 0x8167
+#define GL_TEXTURE_POST_SPECULAR_HP 0x8168
+#define GL_TEXTURE_PRE_SPECULAR_HP 0x8169
+#endif
+
+#ifndef GL_EXT_draw_range_elements
+#define GL_MAX_ELEMENTS_VERTICES_EXT 0x80E8
+#define GL_MAX_ELEMENTS_INDICES_EXT 0x80E9
+#endif
+
+#ifndef GL_WIN_phong_shading
+#define GL_PHONG_WIN 0x80EA
+#define GL_PHONG_HINT_WIN 0x80EB
+#endif
+
+#ifndef GL_WIN_specular_fog
+#define GL_FOG_SPECULAR_TEXTURE_WIN 0x80EC
+#endif
+
+#ifndef GL_EXT_light_texture
+#define GL_FRAGMENT_MATERIAL_EXT 0x8349
+#define GL_FRAGMENT_NORMAL_EXT 0x834A
+#define GL_FRAGMENT_COLOR_EXT 0x834C
+#define GL_ATTENUATION_EXT 0x834D
+#define GL_SHADOW_ATTENUATION_EXT 0x834E
+#define GL_TEXTURE_APPLICATION_MODE_EXT 0x834F
+#define GL_TEXTURE_LIGHT_EXT 0x8350
+#define GL_TEXTURE_MATERIAL_FACE_EXT 0x8351
+#define GL_TEXTURE_MATERIAL_PARAMETER_EXT 0x8352
+/* reuse GL_FRAGMENT_DEPTH_EXT */
+#endif
+
+#ifndef GL_SGIX_blend_alpha_minmax
+#define GL_ALPHA_MIN_SGIX 0x8320
+#define GL_ALPHA_MAX_SGIX 0x8321
+#endif
+
+#ifndef GL_SGIX_impact_pixel_texture
+#define GL_PIXEL_TEX_GEN_Q_CEILING_SGIX 0x8184
+#define GL_PIXEL_TEX_GEN_Q_ROUND_SGIX 0x8185
+#define GL_PIXEL_TEX_GEN_Q_FLOOR_SGIX 0x8186
+#define GL_PIXEL_TEX_GEN_ALPHA_REPLACE_SGIX 0x8187
+#define GL_PIXEL_TEX_GEN_ALPHA_NO_REPLACE_SGIX 0x8188
+#define GL_PIXEL_TEX_GEN_ALPHA_LS_SGIX 0x8189
+#define GL_PIXEL_TEX_GEN_ALPHA_MS_SGIX 0x818A
+#endif
+
+#ifndef GL_EXT_bgra
+#define GL_BGR_EXT 0x80E0
+#define GL_BGRA_EXT 0x80E1
+#endif
+
+#ifndef GL_SGIX_async
+#define GL_ASYNC_MARKER_SGIX 0x8329
+#endif
+
+#ifndef GL_SGIX_async_pixel
+#define GL_ASYNC_TEX_IMAGE_SGIX 0x835C
+#define GL_ASYNC_DRAW_PIXELS_SGIX 0x835D
+#define GL_ASYNC_READ_PIXELS_SGIX 0x835E
+#define GL_MAX_ASYNC_TEX_IMAGE_SGIX 0x835F
+#define GL_MAX_ASYNC_DRAW_PIXELS_SGIX 0x8360
+#define GL_MAX_ASYNC_READ_PIXELS_SGIX 0x8361
+#endif
+
+#ifndef GL_SGIX_async_histogram
+#define GL_ASYNC_HISTOGRAM_SGIX 0x832C
+#define GL_MAX_ASYNC_HISTOGRAM_SGIX 0x832D
+#endif
+
+#ifndef GL_INTEL_texture_scissor
+#endif
+
+#ifndef GL_INTEL_parallel_arrays
+#define GL_PARALLEL_ARRAYS_INTEL 0x83F4
+#define GL_VERTEX_ARRAY_PARALLEL_POINTERS_INTEL 0x83F5
+#define GL_NORMAL_ARRAY_PARALLEL_POINTERS_INTEL 0x83F6
+#define GL_COLOR_ARRAY_PARALLEL_POINTERS_INTEL 0x83F7
+#define GL_TEXTURE_COORD_ARRAY_PARALLEL_POINTERS_INTEL 0x83F8
+#endif
+
+#ifndef GL_HP_occlusion_test
+#define GL_OCCLUSION_TEST_HP 0x8165
+#define GL_OCCLUSION_TEST_RESULT_HP 0x8166
+#endif
+
+#ifndef GL_EXT_pixel_transform
+#define GL_PIXEL_TRANSFORM_2D_EXT 0x8330
+#define GL_PIXEL_MAG_FILTER_EXT 0x8331
+#define GL_PIXEL_MIN_FILTER_EXT 0x8332
+#define GL_PIXEL_CUBIC_WEIGHT_EXT 0x8333
+#define GL_CUBIC_EXT 0x8334
+#define GL_AVERAGE_EXT 0x8335
+#define GL_PIXEL_TRANSFORM_2D_STACK_DEPTH_EXT 0x8336
+#define GL_MAX_PIXEL_TRANSFORM_2D_STACK_DEPTH_EXT 0x8337
+#define GL_PIXEL_TRANSFORM_2D_MATRIX_EXT 0x8338
+#endif
+
+#ifndef GL_EXT_pixel_transform_color_table
+#endif
+
+#ifndef GL_EXT_shared_texture_palette
+#define GL_SHARED_TEXTURE_PALETTE_EXT 0x81FB
+#endif
+
+#ifndef GL_EXT_separate_specular_color
+#define GL_LIGHT_MODEL_COLOR_CONTROL_EXT 0x81F8
+#define GL_SINGLE_COLOR_EXT 0x81F9
+#define GL_SEPARATE_SPECULAR_COLOR_EXT 0x81FA
+#endif
+
+#ifndef GL_EXT_secondary_color
+#define GL_COLOR_SUM_EXT 0x8458
+#define GL_CURRENT_SECONDARY_COLOR_EXT 0x8459
+#define GL_SECONDARY_COLOR_ARRAY_SIZE_EXT 0x845A
+#define GL_SECONDARY_COLOR_ARRAY_TYPE_EXT 0x845B
+#define GL_SECONDARY_COLOR_ARRAY_STRIDE_EXT 0x845C
+#define GL_SECONDARY_COLOR_ARRAY_POINTER_EXT 0x845D
+#define GL_SECONDARY_COLOR_ARRAY_EXT 0x845E
+#endif
+
+#ifndef GL_EXT_texture_perturb_normal
+#define GL_PERTURB_EXT 0x85AE
+#define GL_TEXTURE_NORMAL_EXT 0x85AF
+#endif
+
+#ifndef GL_EXT_multi_draw_arrays
+#endif
+
+#ifndef GL_EXT_fog_coord
+#define GL_FOG_COORDINATE_SOURCE_EXT 0x8450
+#define GL_FOG_COORDINATE_EXT 0x8451
+#define GL_FRAGMENT_DEPTH_EXT 0x8452
+#define GL_CURRENT_FOG_COORDINATE_EXT 0x8453
+#define GL_FOG_COORDINATE_ARRAY_TYPE_EXT 0x8454
+#define GL_FOG_COORDINATE_ARRAY_STRIDE_EXT 0x8455
+#define GL_FOG_COORDINATE_ARRAY_POINTER_EXT 0x8456
+#define GL_FOG_COORDINATE_ARRAY_EXT 0x8457
+#endif
+
+#ifndef GL_REND_screen_coordinates
+#define GL_SCREEN_COORDINATES_REND 0x8490
+#define GL_INVERTED_SCREEN_W_REND 0x8491
+#endif
+
+#ifndef GL_EXT_coordinate_frame
+#define GL_TANGENT_ARRAY_EXT 0x8439
+#define GL_BINORMAL_ARRAY_EXT 0x843A
+#define GL_CURRENT_TANGENT_EXT 0x843B
+#define GL_CURRENT_BINORMAL_EXT 0x843C
+#define GL_TANGENT_ARRAY_TYPE_EXT 0x843E
+#define GL_TANGENT_ARRAY_STRIDE_EXT 0x843F
+#define GL_BINORMAL_ARRAY_TYPE_EXT 0x8440
+#define GL_BINORMAL_ARRAY_STRIDE_EXT 0x8441
+#define GL_TANGENT_ARRAY_POINTER_EXT 0x8442
+#define GL_BINORMAL_ARRAY_POINTER_EXT 0x8443
+#define GL_MAP1_TANGENT_EXT 0x8444
+#define GL_MAP2_TANGENT_EXT 0x8445
+#define GL_MAP1_BINORMAL_EXT 0x8446
+#define GL_MAP2_BINORMAL_EXT 0x8447
+#endif
+
+#ifndef GL_EXT_texture_env_combine
+#define GL_COMBINE_EXT 0x8570
+#define GL_COMBINE_RGB_EXT 0x8571
+#define GL_COMBINE_ALPHA_EXT 0x8572
+#define GL_RGB_SCALE_EXT 0x8573
+#define GL_ADD_SIGNED_EXT 0x8574
+#define GL_INTERPOLATE_EXT 0x8575
+#define GL_CONSTANT_EXT 0x8576
+#define GL_PRIMARY_COLOR_EXT 0x8577
+#define GL_PREVIOUS_EXT 0x8578
+#define GL_SOURCE0_RGB_EXT 0x8580
+#define GL_SOURCE1_RGB_EXT 0x8581
+#define GL_SOURCE2_RGB_EXT 0x8582
+#define GL_SOURCE0_ALPHA_EXT 0x8588
+#define GL_SOURCE1_ALPHA_EXT 0x8589
+#define GL_SOURCE2_ALPHA_EXT 0x858A
+#define GL_OPERAND0_RGB_EXT 0x8590
+#define GL_OPERAND1_RGB_EXT 0x8591
+#define GL_OPERAND2_RGB_EXT 0x8592
+#define GL_OPERAND0_ALPHA_EXT 0x8598
+#define GL_OPERAND1_ALPHA_EXT 0x8599
+#define GL_OPERAND2_ALPHA_EXT 0x859A
+#endif
+
+#ifndef GL_APPLE_specular_vector
+#define GL_LIGHT_MODEL_SPECULAR_VECTOR_APPLE 0x85B0
+#endif
+
+#ifndef GL_APPLE_transform_hint
+#define GL_TRANSFORM_HINT_APPLE 0x85B1
+#endif
+
+#ifndef GL_SGIX_fog_scale
+#define GL_FOG_SCALE_SGIX 0x81FC
+#define GL_FOG_SCALE_VALUE_SGIX 0x81FD
+#endif
+
+#ifndef GL_SUNX_constant_data
+#define GL_UNPACK_CONSTANT_DATA_SUNX 0x81D5
+#define GL_TEXTURE_CONSTANT_DATA_SUNX 0x81D6
+#endif
+
+#ifndef GL_SUN_global_alpha
+#define GL_GLOBAL_ALPHA_SUN 0x81D9
+#define GL_GLOBAL_ALPHA_FACTOR_SUN 0x81DA
+#endif
+
+#ifndef GL_SUN_triangle_list
+#define GL_RESTART_SUN 0x0001
+#define GL_REPLACE_MIDDLE_SUN 0x0002
+#define GL_REPLACE_OLDEST_SUN 0x0003
+#define GL_TRIANGLE_LIST_SUN 0x81D7
+#define GL_REPLACEMENT_CODE_SUN 0x81D8
+#define GL_REPLACEMENT_CODE_ARRAY_SUN 0x85C0
+#define GL_REPLACEMENT_CODE_ARRAY_TYPE_SUN 0x85C1
+#define GL_REPLACEMENT_CODE_ARRAY_STRIDE_SUN 0x85C2
+#define GL_REPLACEMENT_CODE_ARRAY_POINTER_SUN 0x85C3
+#define GL_R1UI_V3F_SUN 0x85C4
+#define GL_R1UI_C4UB_V3F_SUN 0x85C5
+#define GL_R1UI_C3F_V3F_SUN 0x85C6
+#define GL_R1UI_N3F_V3F_SUN 0x85C7
+#define GL_R1UI_C4F_N3F_V3F_SUN 0x85C8
+#define GL_R1UI_T2F_V3F_SUN 0x85C9
+#define GL_R1UI_T2F_N3F_V3F_SUN 0x85CA
+#define GL_R1UI_T2F_C4F_N3F_V3F_SUN 0x85CB
+#endif
+
+#ifndef GL_SUN_vertex
+#endif
+
+#ifndef GL_EXT_blend_func_separate
+#define GL_BLEND_DST_RGB_EXT 0x80C8
+#define GL_BLEND_SRC_RGB_EXT 0x80C9
+#define GL_BLEND_DST_ALPHA_EXT 0x80CA
+#define GL_BLEND_SRC_ALPHA_EXT 0x80CB
+#endif
+
+#ifndef GL_INGR_color_clamp
+#define GL_RED_MIN_CLAMP_INGR 0x8560
+#define GL_GREEN_MIN_CLAMP_INGR 0x8561
+#define GL_BLUE_MIN_CLAMP_INGR 0x8562
+#define GL_ALPHA_MIN_CLAMP_INGR 0x8563
+#define GL_RED_MAX_CLAMP_INGR 0x8564
+#define GL_GREEN_MAX_CLAMP_INGR 0x8565
+#define GL_BLUE_MAX_CLAMP_INGR 0x8566
+#define GL_ALPHA_MAX_CLAMP_INGR 0x8567
+#endif
+
+#ifndef GL_INGR_interlace_read
+#define GL_INTERLACE_READ_INGR 0x8568
+#endif
+
+#ifndef GL_EXT_stencil_wrap
+#define GL_INCR_WRAP_EXT 0x8507
+#define GL_DECR_WRAP_EXT 0x8508
+#endif
+
+#ifndef GL_EXT_422_pixels
+#define GL_422_EXT 0x80CC
+#define GL_422_REV_EXT 0x80CD
+#define GL_422_AVERAGE_EXT 0x80CE
+#define GL_422_REV_AVERAGE_EXT 0x80CF
+#endif
+
+#ifndef GL_NV_texgen_reflection
+#define GL_NORMAL_MAP_NV 0x8511
+#define GL_REFLECTION_MAP_NV 0x8512
+#endif
+
+#ifndef GL_EXT_texture_cube_map
+#define GL_NORMAL_MAP_EXT 0x8511
+#define GL_REFLECTION_MAP_EXT 0x8512
+#define GL_TEXTURE_CUBE_MAP_EXT 0x8513
+#define GL_TEXTURE_BINDING_CUBE_MAP_EXT 0x8514
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_X_EXT 0x8515
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_X_EXT 0x8516
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_Y_EXT 0x8517
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT 0x8518
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_Z_EXT 0x8519
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT 0x851A
+#define GL_PROXY_TEXTURE_CUBE_MAP_EXT 0x851B
+#define GL_MAX_CUBE_MAP_TEXTURE_SIZE_EXT 0x851C
+#endif
+
+#ifndef GL_SUN_convolution_border_modes
+#define GL_WRAP_BORDER_SUN 0x81D4
+#endif
+
+#ifndef GL_EXT_texture_env_add
+#endif
+
+#ifndef GL_EXT_texture_lod_bias
+#define GL_MAX_TEXTURE_LOD_BIAS_EXT 0x84FD
+#define GL_TEXTURE_FILTER_CONTROL_EXT 0x8500
+#define GL_TEXTURE_LOD_BIAS_EXT 0x8501
+#endif
+
+#ifndef GL_EXT_texture_filter_anisotropic
+#define GL_TEXTURE_MAX_ANISOTROPY_EXT 0x84FE
+#define GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT 0x84FF
+#endif
+
+#ifndef GL_EXT_vertex_weighting
+#define GL_MODELVIEW0_STACK_DEPTH_EXT GL_MODELVIEW_STACK_DEPTH
+#define GL_MODELVIEW1_STACK_DEPTH_EXT 0x8502
+#define GL_MODELVIEW0_MATRIX_EXT GL_MODELVIEW_MATRIX
+#define GL_MODELVIEW1_MATRIX_EXT 0x8506
+#define GL_VERTEX_WEIGHTING_EXT 0x8509
+#define GL_MODELVIEW0_EXT GL_MODELVIEW
+#define GL_MODELVIEW1_EXT 0x850A
+#define GL_CURRENT_VERTEX_WEIGHT_EXT 0x850B
+#define GL_VERTEX_WEIGHT_ARRAY_EXT 0x850C
+#define GL_VERTEX_WEIGHT_ARRAY_SIZE_EXT 0x850D
+#define GL_VERTEX_WEIGHT_ARRAY_TYPE_EXT 0x850E
+#define GL_VERTEX_WEIGHT_ARRAY_STRIDE_EXT 0x850F
+#define GL_VERTEX_WEIGHT_ARRAY_POINTER_EXT 0x8510
+#endif
+
+#ifndef GL_NV_light_max_exponent
+#define GL_MAX_SHININESS_NV 0x8504
+#define GL_MAX_SPOT_EXPONENT_NV 0x8505
+#endif
+
+#ifndef GL_NV_vertex_array_range
+#define GL_VERTEX_ARRAY_RANGE_NV 0x851D
+#define GL_VERTEX_ARRAY_RANGE_LENGTH_NV 0x851E
+#define GL_VERTEX_ARRAY_RANGE_VALID_NV 0x851F
+#define GL_MAX_VERTEX_ARRAY_RANGE_ELEMENT_NV 0x8520
+#define GL_VERTEX_ARRAY_RANGE_POINTER_NV 0x8521
+#endif
+
+#ifndef GL_NV_register_combiners
+#define GL_REGISTER_COMBINERS_NV 0x8522
+#define GL_VARIABLE_A_NV 0x8523
+#define GL_VARIABLE_B_NV 0x8524
+#define GL_VARIABLE_C_NV 0x8525
+#define GL_VARIABLE_D_NV 0x8526
+#define GL_VARIABLE_E_NV 0x8527
+#define GL_VARIABLE_F_NV 0x8528
+#define GL_VARIABLE_G_NV 0x8529
+#define GL_CONSTANT_COLOR0_NV 0x852A
+#define GL_CONSTANT_COLOR1_NV 0x852B
+#define GL_PRIMARY_COLOR_NV 0x852C
+#define GL_SECONDARY_COLOR_NV 0x852D
+#define GL_SPARE0_NV 0x852E
+#define GL_SPARE1_NV 0x852F
+#define GL_DISCARD_NV 0x8530
+#define GL_E_TIMES_F_NV 0x8531
+#define GL_SPARE0_PLUS_SECONDARY_COLOR_NV 0x8532
+#define GL_UNSIGNED_IDENTITY_NV 0x8536
+#define GL_UNSIGNED_INVERT_NV 0x8537
+#define GL_EXPAND_NORMAL_NV 0x8538
+#define GL_EXPAND_NEGATE_NV 0x8539
+#define GL_HALF_BIAS_NORMAL_NV 0x853A
+#define GL_HALF_BIAS_NEGATE_NV 0x853B
+#define GL_SIGNED_IDENTITY_NV 0x853C
+#define GL_SIGNED_NEGATE_NV 0x853D
+#define GL_SCALE_BY_TWO_NV 0x853E
+#define GL_SCALE_BY_FOUR_NV 0x853F
+#define GL_SCALE_BY_ONE_HALF_NV 0x8540
+#define GL_BIAS_BY_NEGATIVE_ONE_HALF_NV 0x8541
+#define GL_COMBINER_INPUT_NV 0x8542
+#define GL_COMBINER_MAPPING_NV 0x8543
+#define GL_COMBINER_COMPONENT_USAGE_NV 0x8544
+#define GL_COMBINER_AB_DOT_PRODUCT_NV 0x8545
+#define GL_COMBINER_CD_DOT_PRODUCT_NV 0x8546
+#define GL_COMBINER_MUX_SUM_NV 0x8547
+#define GL_COMBINER_SCALE_NV 0x8548
+#define GL_COMBINER_BIAS_NV 0x8549
+#define GL_COMBINER_AB_OUTPUT_NV 0x854A
+#define GL_COMBINER_CD_OUTPUT_NV 0x854B
+#define GL_COMBINER_SUM_OUTPUT_NV 0x854C
+#define GL_MAX_GENERAL_COMBINERS_NV 0x854D
+#define GL_NUM_GENERAL_COMBINERS_NV 0x854E
+#define GL_COLOR_SUM_CLAMP_NV 0x854F
+#define GL_COMBINER0_NV 0x8550
+#define GL_COMBINER1_NV 0x8551
+#define GL_COMBINER2_NV 0x8552
+#define GL_COMBINER3_NV 0x8553
+#define GL_COMBINER4_NV 0x8554
+#define GL_COMBINER5_NV 0x8555
+#define GL_COMBINER6_NV 0x8556
+#define GL_COMBINER7_NV 0x8557
+/* reuse GL_TEXTURE0_ARB */
+/* reuse GL_TEXTURE1_ARB */
+/* reuse GL_ZERO */
+/* reuse GL_NONE */
+/* reuse GL_FOG */
+#endif
+
+#ifndef GL_NV_fog_distance
+#define GL_FOG_DISTANCE_MODE_NV 0x855A
+#define GL_EYE_RADIAL_NV 0x855B
+#define GL_EYE_PLANE_ABSOLUTE_NV 0x855C
+/* reuse GL_EYE_PLANE */
+#endif
+
+#ifndef GL_NV_texgen_emboss
+#define GL_EMBOSS_LIGHT_NV 0x855D
+#define GL_EMBOSS_CONSTANT_NV 0x855E
+#define GL_EMBOSS_MAP_NV 0x855F
+#endif
+
+#ifndef GL_NV_blend_square
+#endif
+
+#ifndef GL_NV_texture_env_combine4
+#define GL_COMBINE4_NV 0x8503
+#define GL_SOURCE3_RGB_NV 0x8583
+#define GL_SOURCE3_ALPHA_NV 0x858B
+#define GL_OPERAND3_RGB_NV 0x8593
+#define GL_OPERAND3_ALPHA_NV 0x859B
+#endif
+
+#ifndef GL_MESA_resize_buffers
+#endif
+
+#ifndef GL_MESA_window_pos
+#endif
+
+#ifndef GL_EXT_texture_compression_s3tc
+#define GL_COMPRESSED_RGB_S3TC_DXT1_EXT 0x83F0
+#define GL_COMPRESSED_RGBA_S3TC_DXT1_EXT 0x83F1
+#define GL_COMPRESSED_RGBA_S3TC_DXT3_EXT 0x83F2
+#define GL_COMPRESSED_RGBA_S3TC_DXT5_EXT 0x83F3
+#endif
+
+#ifndef GL_IBM_cull_vertex
+#define GL_CULL_VERTEX_IBM 103050
+#endif
+
+#ifndef GL_IBM_multimode_draw_arrays
+#endif
+
+#ifndef GL_IBM_vertex_array_lists
+#define GL_VERTEX_ARRAY_LIST_IBM 103070
+#define GL_NORMAL_ARRAY_LIST_IBM 103071
+#define GL_COLOR_ARRAY_LIST_IBM 103072
+#define GL_INDEX_ARRAY_LIST_IBM 103073
+#define GL_TEXTURE_COORD_ARRAY_LIST_IBM 103074
+#define GL_EDGE_FLAG_ARRAY_LIST_IBM 103075
+#define GL_FOG_COORDINATE_ARRAY_LIST_IBM 103076
+#define GL_SECONDARY_COLOR_ARRAY_LIST_IBM 103077
+#define GL_VERTEX_ARRAY_LIST_STRIDE_IBM 103080
+#define GL_NORMAL_ARRAY_LIST_STRIDE_IBM 103081
+#define GL_COLOR_ARRAY_LIST_STRIDE_IBM 103082
+#define GL_INDEX_ARRAY_LIST_STRIDE_IBM 103083
+#define GL_TEXTURE_COORD_ARRAY_LIST_STRIDE_IBM 103084
+#define GL_EDGE_FLAG_ARRAY_LIST_STRIDE_IBM 103085
+#define GL_FOG_COORDINATE_ARRAY_LIST_STRIDE_IBM 103086
+#define GL_SECONDARY_COLOR_ARRAY_LIST_STRIDE_IBM 103087
+#endif
+
+#ifndef GL_SGIX_subsample
+#define GL_PACK_SUBSAMPLE_RATE_SGIX 0x85A0
+#define GL_UNPACK_SUBSAMPLE_RATE_SGIX 0x85A1
+#define GL_PIXEL_SUBSAMPLE_4444_SGIX 0x85A2
+#define GL_PIXEL_SUBSAMPLE_2424_SGIX 0x85A3
+#define GL_PIXEL_SUBSAMPLE_4242_SGIX 0x85A4
+#endif
+
+#ifndef GL_SGIX_ycrcb_subsample
+#endif
+
+#ifndef GL_SGIX_ycrcba
+#define GL_YCRCB_SGIX 0x8318
+#define GL_YCRCBA_SGIX 0x8319
+#endif
+
+#ifndef GL_SGI_depth_pass_instrument
+#define GL_DEPTH_PASS_INSTRUMENT_SGIX 0x8310
+#define GL_DEPTH_PASS_INSTRUMENT_COUNTERS_SGIX 0x8311
+#define GL_DEPTH_PASS_INSTRUMENT_MAX_SGIX 0x8312
+#endif
+
+#ifndef GL_3DFX_texture_compression_FXT1
+#define GL_COMPRESSED_RGB_FXT1_3DFX 0x86B0
+#define GL_COMPRESSED_RGBA_FXT1_3DFX 0x86B1
+#endif
+
+#ifndef GL_3DFX_multisample
+#define GL_MULTISAMPLE_3DFX 0x86B2
+#define GL_SAMPLE_BUFFERS_3DFX 0x86B3
+#define GL_SAMPLES_3DFX 0x86B4
+#define GL_MULTISAMPLE_BIT_3DFX 0x20000000
+#endif
+
+#ifndef GL_3DFX_tbuffer
+#endif
+
+#ifndef GL_EXT_multisample
+#define GL_MULTISAMPLE_EXT 0x809D
+#define GL_SAMPLE_ALPHA_TO_MASK_EXT 0x809E
+#define GL_SAMPLE_ALPHA_TO_ONE_EXT 0x809F
+#define GL_SAMPLE_MASK_EXT 0x80A0
+#define GL_1PASS_EXT 0x80A1
+#define GL_2PASS_0_EXT 0x80A2
+#define GL_2PASS_1_EXT 0x80A3
+#define GL_4PASS_0_EXT 0x80A4
+#define GL_4PASS_1_EXT 0x80A5
+#define GL_4PASS_2_EXT 0x80A6
+#define GL_4PASS_3_EXT 0x80A7
+#define GL_SAMPLE_BUFFERS_EXT 0x80A8
+#define GL_SAMPLES_EXT 0x80A9
+#define GL_SAMPLE_MASK_VALUE_EXT 0x80AA
+#define GL_SAMPLE_MASK_INVERT_EXT 0x80AB
+#define GL_SAMPLE_PATTERN_EXT 0x80AC
+#define GL_MULTISAMPLE_BIT_EXT 0x20000000
+#endif
+
+#ifndef GL_SGIX_vertex_preclip
+#define GL_VERTEX_PRECLIP_SGIX 0x83EE
+#define GL_VERTEX_PRECLIP_HINT_SGIX 0x83EF
+#endif
+
+#ifndef GL_SGIX_convolution_accuracy
+#define GL_CONVOLUTION_HINT_SGIX 0x8316
+#endif
+
+#ifndef GL_SGIX_resample
+#define GL_PACK_RESAMPLE_SGIX 0x842C
+#define GL_UNPACK_RESAMPLE_SGIX 0x842D
+#define GL_RESAMPLE_REPLICATE_SGIX 0x842E
+#define GL_RESAMPLE_ZERO_FILL_SGIX 0x842F
+#define GL_RESAMPLE_DECIMATE_SGIX 0x8430
+#endif
+
+#ifndef GL_SGIS_point_line_texgen
+#define GL_EYE_DISTANCE_TO_POINT_SGIS 0x81F0
+#define GL_DISTANCE_TO_POINT_SGIS 0x81F1
+#define GL_EYE_DISTANCE_TO_LINE_SGIS 0x81F2
+#define GL_DISTANCE_TO_LINE_SGIS 0x81F3
+#define GL_EYE_POINT_SGIS 0x81F4
+#define GL_POINT_SGIS 0x81F5
+#define GL_EYE_LINE_SGIS 0x81F6
+#define GL_LINE_SGIS 0x81F7
+#endif
+
+#ifndef GL_SGIS_texture_color_mask
+#define GL_TEXTURE_COLOR_WRITEMASK_SGIS 0x81EF
+#endif
+
+#ifndef GL_EXT_texture_env_dot3
+#define GL_DOT3_RGB_EXT 0x8740
+#define GL_DOT3_RGBA_EXT 0x8741
+#endif
+
+#ifndef GL_ATI_texture_mirror_once
+#define GL_MIRROR_CLAMP_ATI 0x8742
+#define GL_MIRROR_CLAMP_TO_EDGE_ATI 0x8743
+#endif
+
+#ifndef GL_NV_fence
+#define GL_ALL_COMPLETED_NV 0x84F2
+#define GL_FENCE_STATUS_NV 0x84F3
+#define GL_FENCE_CONDITION_NV 0x84F4
+#endif
+
+#ifndef GL_IBM_texture_mirrored_repeat
+#define GL_MIRRORED_REPEAT_IBM 0x8370
+#endif
+
+#ifndef GL_NV_evaluators
+#define GL_EVAL_2D_NV 0x86C0
+#define GL_EVAL_TRIANGULAR_2D_NV 0x86C1
+#define GL_MAP_TESSELLATION_NV 0x86C2
+#define GL_MAP_ATTRIB_U_ORDER_NV 0x86C3
+#define GL_MAP_ATTRIB_V_ORDER_NV 0x86C4
+#define GL_EVAL_FRACTIONAL_TESSELLATION_NV 0x86C5
+#define GL_EVAL_VERTEX_ATTRIB0_NV 0x86C6
+#define GL_EVAL_VERTEX_ATTRIB1_NV 0x86C7
+#define GL_EVAL_VERTEX_ATTRIB2_NV 0x86C8
+#define GL_EVAL_VERTEX_ATTRIB3_NV 0x86C9
+#define GL_EVAL_VERTEX_ATTRIB4_NV 0x86CA
+#define GL_EVAL_VERTEX_ATTRIB5_NV 0x86CB
+#define GL_EVAL_VERTEX_ATTRIB6_NV 0x86CC
+#define GL_EVAL_VERTEX_ATTRIB7_NV 0x86CD
+#define GL_EVAL_VERTEX_ATTRIB8_NV 0x86CE
+#define GL_EVAL_VERTEX_ATTRIB9_NV 0x86CF
+#define GL_EVAL_VERTEX_ATTRIB10_NV 0x86D0
+#define GL_EVAL_VERTEX_ATTRIB11_NV 0x86D1
+#define GL_EVAL_VERTEX_ATTRIB12_NV 0x86D2
+#define GL_EVAL_VERTEX_ATTRIB13_NV 0x86D3
+#define GL_EVAL_VERTEX_ATTRIB14_NV 0x86D4
+#define GL_EVAL_VERTEX_ATTRIB15_NV 0x86D5
+#define GL_MAX_MAP_TESSELLATION_NV 0x86D6
+#define GL_MAX_RATIONAL_EVAL_ORDER_NV 0x86D7
+#endif
+
+#ifndef GL_NV_packed_depth_stencil
+#define GL_DEPTH_STENCIL_NV 0x84F9
+#define GL_UNSIGNED_INT_24_8_NV 0x84FA
+#endif
+
+#ifndef GL_NV_register_combiners2
+#define GL_PER_STAGE_CONSTANTS_NV 0x8535
+#endif
+
+#ifndef GL_NV_texture_compression_vtc
+#endif
+
+#ifndef GL_NV_texture_rectangle
+#define GL_TEXTURE_RECTANGLE_NV 0x84F5
+#define GL_TEXTURE_BINDING_RECTANGLE_NV 0x84F6
+#define GL_PROXY_TEXTURE_RECTANGLE_NV 0x84F7
+#define GL_MAX_RECTANGLE_TEXTURE_SIZE_NV 0x84F8
+#endif
+
+#ifndef GL_NV_texture_shader
+#define GL_OFFSET_TEXTURE_RECTANGLE_NV 0x864C
+#define GL_OFFSET_TEXTURE_RECTANGLE_SCALE_NV 0x864D
+#define GL_DOT_PRODUCT_TEXTURE_RECTANGLE_NV 0x864E
+#define GL_RGBA_UNSIGNED_DOT_PRODUCT_MAPPING_NV 0x86D9
+#define GL_UNSIGNED_INT_S8_S8_8_8_NV 0x86DA
+#define GL_UNSIGNED_INT_8_8_S8_S8_REV_NV 0x86DB
+#define GL_DSDT_MAG_INTENSITY_NV 0x86DC
+#define GL_SHADER_CONSISTENT_NV 0x86DD
+#define GL_TEXTURE_SHADER_NV 0x86DE
+#define GL_SHADER_OPERATION_NV 0x86DF
+#define GL_CULL_MODES_NV 0x86E0
+#define GL_OFFSET_TEXTURE_MATRIX_NV 0x86E1
+#define GL_OFFSET_TEXTURE_SCALE_NV 0x86E2
+#define GL_OFFSET_TEXTURE_BIAS_NV 0x86E3
+#define GL_OFFSET_TEXTURE_2D_MATRIX_NV GL_OFFSET_TEXTURE_MATRIX_NV
+#define GL_OFFSET_TEXTURE_2D_SCALE_NV GL_OFFSET_TEXTURE_SCALE_NV
+#define GL_OFFSET_TEXTURE_2D_BIAS_NV GL_OFFSET_TEXTURE_BIAS_NV
+#define GL_PREVIOUS_TEXTURE_INPUT_NV 0x86E4
+#define GL_CONST_EYE_NV 0x86E5
+#define GL_PASS_THROUGH_NV 0x86E6
+#define GL_CULL_FRAGMENT_NV 0x86E7
+#define GL_OFFSET_TEXTURE_2D_NV 0x86E8
+#define GL_DEPENDENT_AR_TEXTURE_2D_NV 0x86E9
+#define GL_DEPENDENT_GB_TEXTURE_2D_NV 0x86EA
+#define GL_DOT_PRODUCT_NV 0x86EC
+#define GL_DOT_PRODUCT_DEPTH_REPLACE_NV 0x86ED
+#define GL_DOT_PRODUCT_TEXTURE_2D_NV 0x86EE
+#define GL_DOT_PRODUCT_TEXTURE_CUBE_MAP_NV 0x86F0
+#define GL_DOT_PRODUCT_DIFFUSE_CUBE_MAP_NV 0x86F1
+#define GL_DOT_PRODUCT_REFLECT_CUBE_MAP_NV 0x86F2
+#define GL_DOT_PRODUCT_CONST_EYE_REFLECT_CUBE_MAP_NV 0x86F3
+#define GL_HILO_NV 0x86F4
+#define GL_DSDT_NV 0x86F5
+#define GL_DSDT_MAG_NV 0x86F6
+#define GL_DSDT_MAG_VIB_NV 0x86F7
+#define GL_HILO16_NV 0x86F8
+#define GL_SIGNED_HILO_NV 0x86F9
+#define GL_SIGNED_HILO16_NV 0x86FA
+#define GL_SIGNED_RGBA_NV 0x86FB
+#define GL_SIGNED_RGBA8_NV 0x86FC
+#define GL_SIGNED_RGB_NV 0x86FE
+#define GL_SIGNED_RGB8_NV 0x86FF
+#define GL_SIGNED_LUMINANCE_NV 0x8701
+#define GL_SIGNED_LUMINANCE8_NV 0x8702
+#define GL_SIGNED_LUMINANCE_ALPHA_NV 0x8703
+#define GL_SIGNED_LUMINANCE8_ALPHA8_NV 0x8704
+#define GL_SIGNED_ALPHA_NV 0x8705
+#define GL_SIGNED_ALPHA8_NV 0x8706
+#define GL_SIGNED_INTENSITY_NV 0x8707
+#define GL_SIGNED_INTENSITY8_NV 0x8708
+#define GL_DSDT8_NV 0x8709
+#define GL_DSDT8_MAG8_NV 0x870A
+#define GL_DSDT8_MAG8_INTENSITY8_NV 0x870B
+#define GL_SIGNED_RGB_UNSIGNED_ALPHA_NV 0x870C
+#define GL_SIGNED_RGB8_UNSIGNED_ALPHA8_NV 0x870D
+#define GL_HI_SCALE_NV 0x870E
+#define GL_LO_SCALE_NV 0x870F
+#define GL_DS_SCALE_NV 0x8710
+#define GL_DT_SCALE_NV 0x8711
+#define GL_MAGNITUDE_SCALE_NV 0x8712
+#define GL_VIBRANCE_SCALE_NV 0x8713
+#define GL_HI_BIAS_NV 0x8714
+#define GL_LO_BIAS_NV 0x8715
+#define GL_DS_BIAS_NV 0x8716
+#define GL_DT_BIAS_NV 0x8717
+#define GL_MAGNITUDE_BIAS_NV 0x8718
+#define GL_VIBRANCE_BIAS_NV 0x8719
+#define GL_TEXTURE_BORDER_VALUES_NV 0x871A
+#define GL_TEXTURE_HI_SIZE_NV 0x871B
+#define GL_TEXTURE_LO_SIZE_NV 0x871C
+#define GL_TEXTURE_DS_SIZE_NV 0x871D
+#define GL_TEXTURE_DT_SIZE_NV 0x871E
+#define GL_TEXTURE_MAG_SIZE_NV 0x871F
+#endif
+
+#ifndef GL_NV_texture_shader2
+#define GL_DOT_PRODUCT_TEXTURE_3D_NV 0x86EF
+#endif
+
+#ifndef GL_NV_vertex_array_range2
+#define GL_VERTEX_ARRAY_RANGE_WITHOUT_FLUSH_NV 0x8533
+#endif
+
+#ifndef GL_NV_vertex_program
+#define GL_VERTEX_PROGRAM_NV 0x8620
+#define GL_VERTEX_STATE_PROGRAM_NV 0x8621
+#define GL_ATTRIB_ARRAY_SIZE_NV 0x8623
+#define GL_ATTRIB_ARRAY_STRIDE_NV 0x8624
+#define GL_ATTRIB_ARRAY_TYPE_NV 0x8625
+#define GL_CURRENT_ATTRIB_NV 0x8626
+#define GL_PROGRAM_LENGTH_NV 0x8627
+#define GL_PROGRAM_STRING_NV 0x8628
+#define GL_MODELVIEW_PROJECTION_NV 0x8629
+#define GL_IDENTITY_NV 0x862A
+#define GL_INVERSE_NV 0x862B
+#define GL_TRANSPOSE_NV 0x862C
+#define GL_INVERSE_TRANSPOSE_NV 0x862D
+#define GL_MAX_TRACK_MATRIX_STACK_DEPTH_NV 0x862E
+#define GL_MAX_TRACK_MATRICES_NV 0x862F
+#define GL_MATRIX0_NV 0x8630
+#define GL_MATRIX1_NV 0x8631
+#define GL_MATRIX2_NV 0x8632
+#define GL_MATRIX3_NV 0x8633
+#define GL_MATRIX4_NV 0x8634
+#define GL_MATRIX5_NV 0x8635
+#define GL_MATRIX6_NV 0x8636
+#define GL_MATRIX7_NV 0x8637
+#define GL_CURRENT_MATRIX_STACK_DEPTH_NV 0x8640
+#define GL_CURRENT_MATRIX_NV 0x8641
+#define GL_VERTEX_PROGRAM_POINT_SIZE_NV 0x8642
+#define GL_VERTEX_PROGRAM_TWO_SIDE_NV 0x8643
+#define GL_PROGRAM_PARAMETER_NV 0x8644
+#define GL_ATTRIB_ARRAY_POINTER_NV 0x8645
+#define GL_PROGRAM_TARGET_NV 0x8646
+#define GL_PROGRAM_RESIDENT_NV 0x8647
+#define GL_TRACK_MATRIX_NV 0x8648
+#define GL_TRACK_MATRIX_TRANSFORM_NV 0x8649
+#define GL_VERTEX_PROGRAM_BINDING_NV 0x864A
+#define GL_PROGRAM_ERROR_POSITION_NV 0x864B
+#define GL_VERTEX_ATTRIB_ARRAY0_NV 0x8650
+#define GL_VERTEX_ATTRIB_ARRAY1_NV 0x8651
+#define GL_VERTEX_ATTRIB_ARRAY2_NV 0x8652
+#define GL_VERTEX_ATTRIB_ARRAY3_NV 0x8653
+#define GL_VERTEX_ATTRIB_ARRAY4_NV 0x8654
+#define GL_VERTEX_ATTRIB_ARRAY5_NV 0x8655
+#define GL_VERTEX_ATTRIB_ARRAY6_NV 0x8656
+#define GL_VERTEX_ATTRIB_ARRAY7_NV 0x8657
+#define GL_VERTEX_ATTRIB_ARRAY8_NV 0x8658
+#define GL_VERTEX_ATTRIB_ARRAY9_NV 0x8659
+#define GL_VERTEX_ATTRIB_ARRAY10_NV 0x865A
+#define GL_VERTEX_ATTRIB_ARRAY11_NV 0x865B
+#define GL_VERTEX_ATTRIB_ARRAY12_NV 0x865C
+#define GL_VERTEX_ATTRIB_ARRAY13_NV 0x865D
+#define GL_VERTEX_ATTRIB_ARRAY14_NV 0x865E
+#define GL_VERTEX_ATTRIB_ARRAY15_NV 0x865F
+#define GL_MAP1_VERTEX_ATTRIB0_4_NV 0x8660
+#define GL_MAP1_VERTEX_ATTRIB1_4_NV 0x8661
+#define GL_MAP1_VERTEX_ATTRIB2_4_NV 0x8662
+#define GL_MAP1_VERTEX_ATTRIB3_4_NV 0x8663
+#define GL_MAP1_VERTEX_ATTRIB4_4_NV 0x8664
+#define GL_MAP1_VERTEX_ATTRIB5_4_NV 0x8665
+#define GL_MAP1_VERTEX_ATTRIB6_4_NV 0x8666
+#define GL_MAP1_VERTEX_ATTRIB7_4_NV 0x8667
+#define GL_MAP1_VERTEX_ATTRIB8_4_NV 0x8668
+#define GL_MAP1_VERTEX_ATTRIB9_4_NV 0x8669
+#define GL_MAP1_VERTEX_ATTRIB10_4_NV 0x866A
+#define GL_MAP1_VERTEX_ATTRIB11_4_NV 0x866B
+#define GL_MAP1_VERTEX_ATTRIB12_4_NV 0x866C
+#define GL_MAP1_VERTEX_ATTRIB13_4_NV 0x866D
+#define GL_MAP1_VERTEX_ATTRIB14_4_NV 0x866E
+#define GL_MAP1_VERTEX_ATTRIB15_4_NV 0x866F
+#define GL_MAP2_VERTEX_ATTRIB0_4_NV 0x8670
+#define GL_MAP2_VERTEX_ATTRIB1_4_NV 0x8671
+#define GL_MAP2_VERTEX_ATTRIB2_4_NV 0x8672
+#define GL_MAP2_VERTEX_ATTRIB3_4_NV 0x8673
+#define GL_MAP2_VERTEX_ATTRIB4_4_NV 0x8674
+#define GL_MAP2_VERTEX_ATTRIB5_4_NV 0x8675
+#define GL_MAP2_VERTEX_ATTRIB6_4_NV 0x8676
+#define GL_MAP2_VERTEX_ATTRIB7_4_NV 0x8677
+#define GL_MAP2_VERTEX_ATTRIB8_4_NV 0x8678
+#define GL_MAP2_VERTEX_ATTRIB9_4_NV 0x8679
+#define GL_MAP2_VERTEX_ATTRIB10_4_NV 0x867A
+#define GL_MAP2_VERTEX_ATTRIB11_4_NV 0x867B
+#define GL_MAP2_VERTEX_ATTRIB12_4_NV 0x867C
+#define GL_MAP2_VERTEX_ATTRIB13_4_NV 0x867D
+#define GL_MAP2_VERTEX_ATTRIB14_4_NV 0x867E
+#define GL_MAP2_VERTEX_ATTRIB15_4_NV 0x867F
+#endif
+
+#ifndef GL_SGIX_texture_coordinate_clamp
+#define GL_TEXTURE_MAX_CLAMP_S_SGIX 0x8369
+#define GL_TEXTURE_MAX_CLAMP_T_SGIX 0x836A
+#define GL_TEXTURE_MAX_CLAMP_R_SGIX 0x836B
+#endif
+
+#ifndef GL_SGIX_scalebias_hint
+#define GL_SCALEBIAS_HINT_SGIX 0x8322
+#endif
+
+#ifndef GL_OML_interlace
+#define GL_INTERLACE_OML 0x8980
+#define GL_INTERLACE_READ_OML 0x8981
+#endif
+
+#ifndef GL_OML_subsample
+#define GL_FORMAT_SUBSAMPLE_24_24_OML 0x8982
+#define GL_FORMAT_SUBSAMPLE_244_244_OML 0x8983
+#endif
+
+#ifndef GL_OML_resample
+#define GL_PACK_RESAMPLE_OML 0x8984
+#define GL_UNPACK_RESAMPLE_OML 0x8985
+#define GL_RESAMPLE_REPLICATE_OML 0x8986
+#define GL_RESAMPLE_ZERO_FILL_OML 0x8987
+#define GL_RESAMPLE_AVERAGE_OML 0x8988
+#define GL_RESAMPLE_DECIMATE_OML 0x8989
+#endif
+
+#ifndef GL_NV_copy_depth_to_color
+#define GL_DEPTH_STENCIL_TO_RGBA_NV 0x886E
+#define GL_DEPTH_STENCIL_TO_BGRA_NV 0x886F
+#endif
+
+#ifndef GL_ATI_envmap_bumpmap
+#define GL_BUMP_ROT_MATRIX_ATI 0x8775
+#define GL_BUMP_ROT_MATRIX_SIZE_ATI 0x8776
+#define GL_BUMP_NUM_TEX_UNITS_ATI 0x8777
+#define GL_BUMP_TEX_UNITS_ATI 0x8778
+#define GL_DUDV_ATI 0x8779
+#define GL_DU8DV8_ATI 0x877A
+#define GL_BUMP_ENVMAP_ATI 0x877B
+#define GL_BUMP_TARGET_ATI 0x877C
+#endif
+
+#ifndef GL_ATI_fragment_shader
+#define GL_FRAGMENT_SHADER_ATI 0x8920
+#define GL_REG_0_ATI 0x8921
+#define GL_REG_1_ATI 0x8922
+#define GL_REG_2_ATI 0x8923
+#define GL_REG_3_ATI 0x8924
+#define GL_REG_4_ATI 0x8925
+#define GL_REG_5_ATI 0x8926
+#define GL_REG_6_ATI 0x8927
+#define GL_REG_7_ATI 0x8928
+#define GL_REG_8_ATI 0x8929
+#define GL_REG_9_ATI 0x892A
+#define GL_REG_10_ATI 0x892B
+#define GL_REG_11_ATI 0x892C
+#define GL_REG_12_ATI 0x892D
+#define GL_REG_13_ATI 0x892E
+#define GL_REG_14_ATI 0x892F
+#define GL_REG_15_ATI 0x8930
+#define GL_REG_16_ATI 0x8931
+#define GL_REG_17_ATI 0x8932
+#define GL_REG_18_ATI 0x8933
+#define GL_REG_19_ATI 0x8934
+#define GL_REG_20_ATI 0x8935
+#define GL_REG_21_ATI 0x8936
+#define GL_REG_22_ATI 0x8937
+#define GL_REG_23_ATI 0x8938
+#define GL_REG_24_ATI 0x8939
+#define GL_REG_25_ATI 0x893A
+#define GL_REG_26_ATI 0x893B
+#define GL_REG_27_ATI 0x893C
+#define GL_REG_28_ATI 0x893D
+#define GL_REG_29_ATI 0x893E
+#define GL_REG_30_ATI 0x893F
+#define GL_REG_31_ATI 0x8940
+#define GL_CON_0_ATI 0x8941
+#define GL_CON_1_ATI 0x8942
+#define GL_CON_2_ATI 0x8943
+#define GL_CON_3_ATI 0x8944
+#define GL_CON_4_ATI 0x8945
+#define GL_CON_5_ATI 0x8946
+#define GL_CON_6_ATI 0x8947
+#define GL_CON_7_ATI 0x8948
+#define GL_CON_8_ATI 0x8949
+#define GL_CON_9_ATI 0x894A
+#define GL_CON_10_ATI 0x894B
+#define GL_CON_11_ATI 0x894C
+#define GL_CON_12_ATI 0x894D
+#define GL_CON_13_ATI 0x894E
+#define GL_CON_14_ATI 0x894F
+#define GL_CON_15_ATI 0x8950
+#define GL_CON_16_ATI 0x8951
+#define GL_CON_17_ATI 0x8952
+#define GL_CON_18_ATI 0x8953
+#define GL_CON_19_ATI 0x8954
+#define GL_CON_20_ATI 0x8955
+#define GL_CON_21_ATI 0x8956
+#define GL_CON_22_ATI 0x8957
+#define GL_CON_23_ATI 0x8958
+#define GL_CON_24_ATI 0x8959
+#define GL_CON_25_ATI 0x895A
+#define GL_CON_26_ATI 0x895B
+#define GL_CON_27_ATI 0x895C
+#define GL_CON_28_ATI 0x895D
+#define GL_CON_29_ATI 0x895E
+#define GL_CON_30_ATI 0x895F
+#define GL_CON_31_ATI 0x8960
+#define GL_MOV_ATI 0x8961
+#define GL_ADD_ATI 0x8963
+#define GL_MUL_ATI 0x8964
+#define GL_SUB_ATI 0x8965
+#define GL_DOT3_ATI 0x8966
+#define GL_DOT4_ATI 0x8967
+#define GL_MAD_ATI 0x8968
+#define GL_LERP_ATI 0x8969
+#define GL_CND_ATI 0x896A
+#define GL_CND0_ATI 0x896B
+#define GL_DOT2_ADD_ATI 0x896C
+#define GL_SECONDARY_INTERPOLATOR_ATI 0x896D
+#define GL_NUM_FRAGMENT_REGISTERS_ATI 0x896E
+#define GL_NUM_FRAGMENT_CONSTANTS_ATI 0x896F
+#define GL_NUM_PASSES_ATI 0x8970
+#define GL_NUM_INSTRUCTIONS_PER_PASS_ATI 0x8971
+#define GL_NUM_INSTRUCTIONS_TOTAL_ATI 0x8972
+#define GL_NUM_INPUT_INTERPOLATOR_COMPONENTS_ATI 0x8973
+#define GL_NUM_LOOPBACK_COMPONENTS_ATI 0x8974
+#define GL_COLOR_ALPHA_PAIRING_ATI 0x8975
+#define GL_SWIZZLE_STR_ATI 0x8976
+#define GL_SWIZZLE_STQ_ATI 0x8977
+#define GL_SWIZZLE_STR_DR_ATI 0x8978
+#define GL_SWIZZLE_STQ_DQ_ATI 0x8979
+#define GL_SWIZZLE_STRQ_ATI 0x897A
+#define GL_SWIZZLE_STRQ_DQ_ATI 0x897B
+#define GL_RED_BIT_ATI 0x00000001
+#define GL_GREEN_BIT_ATI 0x00000002
+#define GL_BLUE_BIT_ATI 0x00000004
+#define GL_2X_BIT_ATI 0x00000001
+#define GL_4X_BIT_ATI 0x00000002
+#define GL_8X_BIT_ATI 0x00000004
+#define GL_HALF_BIT_ATI 0x00000008
+#define GL_QUARTER_BIT_ATI 0x00000010
+#define GL_EIGHTH_BIT_ATI 0x00000020
+#define GL_SATURATE_BIT_ATI 0x00000040
+#define GL_COMP_BIT_ATI 0x00000002
+#define GL_NEGATE_BIT_ATI 0x00000004
+#define GL_BIAS_BIT_ATI 0x00000008
+#endif
+
+#ifndef GL_ATI_pn_triangles
+#define GL_PN_TRIANGLES_ATI 0x87F0
+#define GL_MAX_PN_TRIANGLES_TESSELATION_LEVEL_ATI 0x87F1
+#define GL_PN_TRIANGLES_POINT_MODE_ATI 0x87F2
+#define GL_PN_TRIANGLES_NORMAL_MODE_ATI 0x87F3
+#define GL_PN_TRIANGLES_TESSELATION_LEVEL_ATI 0x87F4
+#define GL_PN_TRIANGLES_POINT_MODE_LINEAR_ATI 0x87F5
+#define GL_PN_TRIANGLES_POINT_MODE_CUBIC_ATI 0x87F6
+#define GL_PN_TRIANGLES_NORMAL_MODE_LINEAR_ATI 0x87F7
+#define GL_PN_TRIANGLES_NORMAL_MODE_QUADRATIC_ATI 0x87F8
+#endif
+
+#ifndef GL_ATI_vertex_array_object
+#define GL_STATIC_ATI 0x8760
+#define GL_DYNAMIC_ATI 0x8761
+#define GL_PRESERVE_ATI 0x8762
+#define GL_DISCARD_ATI 0x8763
+#define GL_BUFFER_SIZE_ATI 0x8764
+#define GL_BUFFER_USAGE_ATI 0x8765
+#define GL_ARRAY_BUFFER_ATI 0x8766
+#define GL_ARRAY_OFFSET_ATI 0x8767
+#endif
+
+#ifndef GL_EXT_vertex_shader
+#define GL_VERTEX_SHADER_EXT 0x8780
+#define GL_VERTEX_SHADER_BINDING_EXT 0x8781
+#define GL_OP_INDEX_EXT 0x8782
+#define GL_OP_NEGATE_EXT 0x8783
+#define GL_OP_DOT3_EXT 0x8784
+#define GL_OP_DOT4_EXT 0x8785
+#define GL_OP_MUL_EXT 0x8786
+#define GL_OP_ADD_EXT 0x8787
+#define GL_OP_MADD_EXT 0x8788
+#define GL_OP_FRAC_EXT 0x8789
+#define GL_OP_MAX_EXT 0x878A
+#define GL_OP_MIN_EXT 0x878B
+#define GL_OP_SET_GE_EXT 0x878C
+#define GL_OP_SET_LT_EXT 0x878D
+#define GL_OP_CLAMP_EXT 0x878E
+#define GL_OP_FLOOR_EXT 0x878F
+#define GL_OP_ROUND_EXT 0x8790
+#define GL_OP_EXP_BASE_2_EXT 0x8791
+#define GL_OP_LOG_BASE_2_EXT 0x8792
+#define GL_OP_POWER_EXT 0x8793
+#define GL_OP_RECIP_EXT 0x8794
+#define GL_OP_RECIP_SQRT_EXT 0x8795
+#define GL_OP_SUB_EXT 0x8796
+#define GL_OP_CROSS_PRODUCT_EXT 0x8797
+#define GL_OP_MULTIPLY_MATRIX_EXT 0x8798
+#define GL_OP_MOV_EXT 0x8799
+#define GL_OUTPUT_VERTEX_EXT 0x879A
+#define GL_OUTPUT_COLOR0_EXT 0x879B
+#define GL_OUTPUT_COLOR1_EXT 0x879C
+#define GL_OUTPUT_TEXTURE_COORD0_EXT 0x879D
+#define GL_OUTPUT_TEXTURE_COORD1_EXT 0x879E
+#define GL_OUTPUT_TEXTURE_COORD2_EXT 0x879F
+#define GL_OUTPUT_TEXTURE_COORD3_EXT 0x87A0
+#define GL_OUTPUT_TEXTURE_COORD4_EXT 0x87A1
+#define GL_OUTPUT_TEXTURE_COORD5_EXT 0x87A2
+#define GL_OUTPUT_TEXTURE_COORD6_EXT 0x87A3
+#define GL_OUTPUT_TEXTURE_COORD7_EXT 0x87A4
+#define GL_OUTPUT_TEXTURE_COORD8_EXT 0x87A5
+#define GL_OUTPUT_TEXTURE_COORD9_EXT 0x87A6
+#define GL_OUTPUT_TEXTURE_COORD10_EXT 0x87A7
+#define GL_OUTPUT_TEXTURE_COORD11_EXT 0x87A8
+#define GL_OUTPUT_TEXTURE_COORD12_EXT 0x87A9
+#define GL_OUTPUT_TEXTURE_COORD13_EXT 0x87AA
+#define GL_OUTPUT_TEXTURE_COORD14_EXT 0x87AB
+#define GL_OUTPUT_TEXTURE_COORD15_EXT 0x87AC
+#define GL_OUTPUT_TEXTURE_COORD16_EXT 0x87AD
+#define GL_OUTPUT_TEXTURE_COORD17_EXT 0x87AE
+#define GL_OUTPUT_TEXTURE_COORD18_EXT 0x87AF
+#define GL_OUTPUT_TEXTURE_COORD19_EXT 0x87B0
+#define GL_OUTPUT_TEXTURE_COORD20_EXT 0x87B1
+#define GL_OUTPUT_TEXTURE_COORD21_EXT 0x87B2
+#define GL_OUTPUT_TEXTURE_COORD22_EXT 0x87B3
+#define GL_OUTPUT_TEXTURE_COORD23_EXT 0x87B4
+#define GL_OUTPUT_TEXTURE_COORD24_EXT 0x87B5
+#define GL_OUTPUT_TEXTURE_COORD25_EXT 0x87B6
+#define GL_OUTPUT_TEXTURE_COORD26_EXT 0x87B7
+#define GL_OUTPUT_TEXTURE_COORD27_EXT 0x87B8
+#define GL_OUTPUT_TEXTURE_COORD28_EXT 0x87B9
+#define GL_OUTPUT_TEXTURE_COORD29_EXT 0x87BA
+#define GL_OUTPUT_TEXTURE_COORD30_EXT 0x87BB
+#define GL_OUTPUT_TEXTURE_COORD31_EXT 0x87BC
+#define GL_OUTPUT_FOG_EXT 0x87BD
+#define GL_SCALAR_EXT 0x87BE
+#define GL_VECTOR_EXT 0x87BF
+#define GL_MATRIX_EXT 0x87C0
+#define GL_VARIANT_EXT 0x87C1
+#define GL_INVARIANT_EXT 0x87C2
+#define GL_LOCAL_CONSTANT_EXT 0x87C3
+#define GL_LOCAL_EXT 0x87C4
+#define GL_MAX_VERTEX_SHADER_INSTRUCTIONS_EXT 0x87C5
+#define GL_MAX_VERTEX_SHADER_VARIANTS_EXT 0x87C6
+#define GL_MAX_VERTEX_SHADER_INVARIANTS_EXT 0x87C7
+#define GL_MAX_VERTEX_SHADER_LOCAL_CONSTANTS_EXT 0x87C8
+#define GL_MAX_VERTEX_SHADER_LOCALS_EXT 0x87C9
+#define GL_MAX_OPTIMIZED_VERTEX_SHADER_INSTRUCTIONS_EXT 0x87CA
+#define GL_MAX_OPTIMIZED_VERTEX_SHADER_VARIANTS_EXT 0x87CB
+#define GL_MAX_OPTIMIZED_VERTEX_SHADER_LOCAL_CONSTANTS_EXT 0x87CC
+#define GL_MAX_OPTIMIZED_VERTEX_SHADER_INVARIANTS_EXT 0x87CD
+#define GL_MAX_OPTIMIZED_VERTEX_SHADER_LOCALS_EXT 0x87CE
+#define GL_VERTEX_SHADER_INSTRUCTIONS_EXT 0x87CF
+#define GL_VERTEX_SHADER_VARIANTS_EXT 0x87D0
+#define GL_VERTEX_SHADER_INVARIANTS_EXT 0x87D1
+#define GL_VERTEX_SHADER_LOCAL_CONSTANTS_EXT 0x87D2
+#define GL_VERTEX_SHADER_LOCALS_EXT 0x87D3
+#define GL_VERTEX_SHADER_OPTIMIZED_EXT 0x87D4
+#define GL_X_EXT 0x87D5
+#define GL_Y_EXT 0x87D6
+#define GL_Z_EXT 0x87D7
+#define GL_W_EXT 0x87D8
+#define GL_NEGATIVE_X_EXT 0x87D9
+#define GL_NEGATIVE_Y_EXT 0x87DA
+#define GL_NEGATIVE_Z_EXT 0x87DB
+#define GL_NEGATIVE_W_EXT 0x87DC
+#define GL_ZERO_EXT 0x87DD
+#define GL_ONE_EXT 0x87DE
+#define GL_NEGATIVE_ONE_EXT 0x87DF
+#define GL_NORMALIZED_RANGE_EXT 0x87E0
+#define GL_FULL_RANGE_EXT 0x87E1
+#define GL_CURRENT_VERTEX_EXT 0x87E2
+#define GL_MVP_MATRIX_EXT 0x87E3
+#define GL_VARIANT_VALUE_EXT 0x87E4
+#define GL_VARIANT_DATATYPE_EXT 0x87E5
+#define GL_VARIANT_ARRAY_STRIDE_EXT 0x87E6
+#define GL_VARIANT_ARRAY_TYPE_EXT 0x87E7
+#define GL_VARIANT_ARRAY_EXT 0x87E8
+#define GL_VARIANT_ARRAY_POINTER_EXT 0x87E9
+#define GL_INVARIANT_VALUE_EXT 0x87EA
+#define GL_INVARIANT_DATATYPE_EXT 0x87EB
+#define GL_LOCAL_CONSTANT_VALUE_EXT 0x87EC
+#define GL_LOCAL_CONSTANT_DATATYPE_EXT 0x87ED
+#endif
+
+#ifndef GL_ATI_vertex_streams
+#define GL_MAX_VERTEX_STREAMS_ATI 0x876B
+#define GL_VERTEX_STREAM0_ATI 0x876C
+#define GL_VERTEX_STREAM1_ATI 0x876D
+#define GL_VERTEX_STREAM2_ATI 0x876E
+#define GL_VERTEX_STREAM3_ATI 0x876F
+#define GL_VERTEX_STREAM4_ATI 0x8770
+#define GL_VERTEX_STREAM5_ATI 0x8771
+#define GL_VERTEX_STREAM6_ATI 0x8772
+#define GL_VERTEX_STREAM7_ATI 0x8773
+#define GL_VERTEX_SOURCE_ATI 0x8774
+#endif
+
+#ifndef GL_ATI_element_array
+#define GL_ELEMENT_ARRAY_ATI 0x8768
+#define GL_ELEMENT_ARRAY_TYPE_ATI 0x8769
+#define GL_ELEMENT_ARRAY_POINTER_ATI 0x876A
+#endif
+
+#ifndef GL_SUN_mesh_array
+#define GL_QUAD_MESH_SUN 0x8614
+#define GL_TRIANGLE_MESH_SUN 0x8615
+#endif
+
+#ifndef GL_SUN_slice_accum
+#define GL_SLICE_ACCUM_SUN 0x85CC
+#endif
+
+#ifndef GL_NV_multisample_filter_hint
+#define GL_MULTISAMPLE_FILTER_HINT_NV 0x8534
+#endif
+
+#ifndef GL_NV_depth_clamp
+#define GL_DEPTH_CLAMP_NV 0x864F
+#endif
+
+#ifndef GL_NV_occlusion_query
+#define GL_PIXEL_COUNTER_BITS_NV 0x8864
+#define GL_CURRENT_OCCLUSION_QUERY_ID_NV 0x8865
+#define GL_PIXEL_COUNT_NV 0x8866
+#define GL_PIXEL_COUNT_AVAILABLE_NV 0x8867
+#endif
+
+#ifndef GL_NV_point_sprite
+#define GL_POINT_SPRITE_NV 0x8861
+#define GL_COORD_REPLACE_NV 0x8862
+#define GL_POINT_SPRITE_R_MODE_NV 0x8863
+#endif
+
+#ifndef GL_NV_texture_shader3
+#define GL_OFFSET_PROJECTIVE_TEXTURE_2D_NV 0x8850
+#define GL_OFFSET_PROJECTIVE_TEXTURE_2D_SCALE_NV 0x8851
+#define GL_OFFSET_PROJECTIVE_TEXTURE_RECTANGLE_NV 0x8852
+#define GL_OFFSET_PROJECTIVE_TEXTURE_RECTANGLE_SCALE_NV 0x8853
+#define GL_OFFSET_HILO_TEXTURE_2D_NV 0x8854
+#define GL_OFFSET_HILO_TEXTURE_RECTANGLE_NV 0x8855
+#define GL_OFFSET_HILO_PROJECTIVE_TEXTURE_2D_NV 0x8856
+#define GL_OFFSET_HILO_PROJECTIVE_TEXTURE_RECTANGLE_NV 0x8857
+#define GL_DEPENDENT_HILO_TEXTURE_2D_NV 0x8858
+#define GL_DEPENDENT_RGB_TEXTURE_3D_NV 0x8859
+#define GL_DEPENDENT_RGB_TEXTURE_CUBE_MAP_NV 0x885A
+#define GL_DOT_PRODUCT_PASS_THROUGH_NV 0x885B
+#define GL_DOT_PRODUCT_TEXTURE_1D_NV 0x885C
+#define GL_DOT_PRODUCT_AFFINE_DEPTH_REPLACE_NV 0x885D
+#define GL_HILO8_NV 0x885E
+#define GL_SIGNED_HILO8_NV 0x885F
+#define GL_FORCE_BLUE_TO_ONE_NV 0x8860
+#endif
+
+#ifndef GL_NV_vertex_program1_1
+#endif
+
+#ifndef GL_EXT_shadow_funcs
+#endif
+
+#ifndef GL_EXT_stencil_two_side
+#define GL_STENCIL_TEST_TWO_SIDE_EXT 0x8910
+#define GL_ACTIVE_STENCIL_FACE_EXT 0x8911
+#endif
+
+#ifndef GL_ATI_text_fragment_shader
+#define GL_TEXT_FRAGMENT_SHADER_ATI 0x8200
+#endif
+
+#ifndef GL_APPLE_client_storage
+#define GL_UNPACK_CLIENT_STORAGE_APPLE 0x85B2
+#endif
+
+#ifndef GL_APPLE_element_array
+#define GL_ELEMENT_ARRAY_APPLE 0x8768
+#define GL_ELEMENT_ARRAY_TYPE_APPLE 0x8769
+#define GL_ELEMENT_ARRAY_POINTER_APPLE 0x876A
+#endif
+
+#ifndef GL_APPLE_fence
+#define GL_DRAW_PIXELS_APPLE 0x8A0A
+#define GL_FENCE_APPLE 0x8A0B
+#endif
+
+#ifndef GL_APPLE_vertex_array_object
+#define GL_VERTEX_ARRAY_BINDING_APPLE 0x85B5
+#endif
+
+#ifndef GL_APPLE_vertex_array_range
+#define GL_VERTEX_ARRAY_RANGE_APPLE 0x851D
+#define GL_VERTEX_ARRAY_RANGE_LENGTH_APPLE 0x851E
+#define GL_VERTEX_ARRAY_STORAGE_HINT_APPLE 0x851F
+#define GL_VERTEX_ARRAY_RANGE_POINTER_APPLE 0x8521
+#define GL_STORAGE_CACHED_APPLE 0x85BE
+#define GL_STORAGE_SHARED_APPLE 0x85BF
+#endif
+
+#ifndef GL_APPLE_ycbcr_422
+#define GL_YCBCR_422_APPLE 0x85B9
+#define GL_UNSIGNED_SHORT_8_8_APPLE 0x85BA
+#define GL_UNSIGNED_SHORT_8_8_REV_APPLE 0x85BB
+#endif
+
+#ifndef GL_S3_s3tc
+#define GL_RGB_S3TC 0x83A0
+#define GL_RGB4_S3TC 0x83A1
+#define GL_RGBA_S3TC 0x83A2
+#define GL_RGBA4_S3TC 0x83A3
+#endif
+
+#ifndef GL_ATI_draw_buffers
+#define GL_MAX_DRAW_BUFFERS_ATI 0x8824
+#define GL_DRAW_BUFFER0_ATI 0x8825
+#define GL_DRAW_BUFFER1_ATI 0x8826
+#define GL_DRAW_BUFFER2_ATI 0x8827
+#define GL_DRAW_BUFFER3_ATI 0x8828
+#define GL_DRAW_BUFFER4_ATI 0x8829
+#define GL_DRAW_BUFFER5_ATI 0x882A
+#define GL_DRAW_BUFFER6_ATI 0x882B
+#define GL_DRAW_BUFFER7_ATI 0x882C
+#define GL_DRAW_BUFFER8_ATI 0x882D
+#define GL_DRAW_BUFFER9_ATI 0x882E
+#define GL_DRAW_BUFFER10_ATI 0x882F
+#define GL_DRAW_BUFFER11_ATI 0x8830
+#define GL_DRAW_BUFFER12_ATI 0x8831
+#define GL_DRAW_BUFFER13_ATI 0x8832
+#define GL_DRAW_BUFFER14_ATI 0x8833
+#define GL_DRAW_BUFFER15_ATI 0x8834
+#endif
+
+#ifndef GL_ATI_pixel_format_float
+#define GL_TYPE_RGBA_FLOAT_ATI 0x8820
+#define GL_COLOR_CLEAR_UNCLAMPED_VALUE_ATI 0x8835
+#endif
+
+#ifndef GL_ATI_texture_env_combine3
+#define GL_MODULATE_ADD_ATI 0x8744
+#define GL_MODULATE_SIGNED_ADD_ATI 0x8745
+#define GL_MODULATE_SUBTRACT_ATI 0x8746
+#endif
+
+#ifndef GL_ATI_texture_float
+#define GL_RGBA_FLOAT32_ATI 0x8814
+#define GL_RGB_FLOAT32_ATI 0x8815
+#define GL_ALPHA_FLOAT32_ATI 0x8816
+#define GL_INTENSITY_FLOAT32_ATI 0x8817
+#define GL_LUMINANCE_FLOAT32_ATI 0x8818
+#define GL_LUMINANCE_ALPHA_FLOAT32_ATI 0x8819
+#define GL_RGBA_FLOAT16_ATI 0x881A
+#define GL_RGB_FLOAT16_ATI 0x881B
+#define GL_ALPHA_FLOAT16_ATI 0x881C
+#define GL_INTENSITY_FLOAT16_ATI 0x881D
+#define GL_LUMINANCE_FLOAT16_ATI 0x881E
+#define GL_LUMINANCE_ALPHA_FLOAT16_ATI 0x881F
+#endif
+
+#ifndef GL_NV_float_buffer
+#define GL_FLOAT_R_NV 0x8880
+#define GL_FLOAT_RG_NV 0x8881
+#define GL_FLOAT_RGB_NV 0x8882
+#define GL_FLOAT_RGBA_NV 0x8883
+#define GL_FLOAT_R16_NV 0x8884
+#define GL_FLOAT_R32_NV 0x8885
+#define GL_FLOAT_RG16_NV 0x8886
+#define GL_FLOAT_RG32_NV 0x8887
+#define GL_FLOAT_RGB16_NV 0x8888
+#define GL_FLOAT_RGB32_NV 0x8889
+#define GL_FLOAT_RGBA16_NV 0x888A
+#define GL_FLOAT_RGBA32_NV 0x888B
+#define GL_TEXTURE_FLOAT_COMPONENTS_NV 0x888C
+#define GL_FLOAT_CLEAR_COLOR_VALUE_NV 0x888D
+#define GL_FLOAT_RGBA_MODE_NV 0x888E
+#endif
+
+#ifndef GL_NV_fragment_program
+#define GL_MAX_FRAGMENT_PROGRAM_LOCAL_PARAMETERS_NV 0x8868
+#define GL_FRAGMENT_PROGRAM_NV 0x8870
+#define GL_MAX_TEXTURE_COORDS_NV 0x8871
+#define GL_MAX_TEXTURE_IMAGE_UNITS_NV 0x8872
+#define GL_FRAGMENT_PROGRAM_BINDING_NV 0x8873
+#define GL_PROGRAM_ERROR_STRING_NV 0x8874
+#endif
+
+#ifndef GL_NV_half_float
+#define GL_HALF_FLOAT_NV 0x140B
+#endif
+
+#ifndef GL_NV_pixel_data_range
+#define GL_WRITE_PIXEL_DATA_RANGE_NV 0x8878
+#define GL_READ_PIXEL_DATA_RANGE_NV 0x8879
+#define GL_WRITE_PIXEL_DATA_RANGE_LENGTH_NV 0x887A
+#define GL_READ_PIXEL_DATA_RANGE_LENGTH_NV 0x887B
+#define GL_WRITE_PIXEL_DATA_RANGE_POINTER_NV 0x887C
+#define GL_READ_PIXEL_DATA_RANGE_POINTER_NV 0x887D
+#endif
+
+#ifndef GL_NV_primitive_restart
+#define GL_PRIMITIVE_RESTART_NV 0x8558
+#define GL_PRIMITIVE_RESTART_INDEX_NV 0x8559
+#endif
+
+#ifndef GL_NV_texture_expand_normal
+#define GL_TEXTURE_UNSIGNED_REMAP_MODE_NV 0x888F
+#endif
+
+#ifndef GL_NV_vertex_program2
+#endif
+
+#ifndef GL_ATI_map_object_buffer
+#endif
+
+#ifndef GL_ATI_separate_stencil
+#define GL_STENCIL_BACK_FUNC_ATI 0x8800
+#define GL_STENCIL_BACK_FAIL_ATI 0x8801
+#define GL_STENCIL_BACK_PASS_DEPTH_FAIL_ATI 0x8802
+#define GL_STENCIL_BACK_PASS_DEPTH_PASS_ATI 0x8803
+#endif
+
+#ifndef GL_ATI_vertex_attrib_array_object
+#endif
+
+#ifndef GL_OES_read_format
+#define GL_IMPLEMENTATION_COLOR_READ_TYPE_OES 0x8B9A
+#define GL_IMPLEMENTATION_COLOR_READ_FORMAT_OES 0x8B9B
+#endif
+
+#ifndef GL_EXT_depth_bounds_test
+#define GL_DEPTH_BOUNDS_TEST_EXT 0x8890
+#define GL_DEPTH_BOUNDS_EXT 0x8891
+#endif
+
+#ifndef GL_EXT_texture_mirror_clamp
+#define GL_MIRROR_CLAMP_EXT 0x8742
+#define GL_MIRROR_CLAMP_TO_EDGE_EXT 0x8743
+#define GL_MIRROR_CLAMP_TO_BORDER_EXT 0x8912
+#endif
+
+#ifndef GL_EXT_blend_equation_separate
+#define GL_BLEND_EQUATION_RGB_EXT GL_BLEND_EQUATION
+#define GL_BLEND_EQUATION_ALPHA_EXT 0x883D
+#endif
+
+#ifndef GL_MESA_pack_invert
+#define GL_PACK_INVERT_MESA 0x8758
+#endif
+
+#ifndef GL_MESA_ycbcr_texture
+#define GL_UNSIGNED_SHORT_8_8_MESA 0x85BA
+#define GL_UNSIGNED_SHORT_8_8_REV_MESA 0x85BB
+#define GL_YCBCR_MESA 0x8757
+#endif
+
+#ifndef GL_EXT_pixel_buffer_object
+#define GL_PIXEL_PACK_BUFFER_EXT 0x88EB
+#define GL_PIXEL_UNPACK_BUFFER_EXT 0x88EC
+#define GL_PIXEL_PACK_BUFFER_BINDING_EXT 0x88ED
+#define GL_PIXEL_UNPACK_BUFFER_BINDING_EXT 0x88EF
+#endif
+
+#ifndef GL_NV_fragment_program_option
+#endif
+
+#ifndef GL_NV_fragment_program2
+#define GL_MAX_PROGRAM_EXEC_INSTRUCTIONS_NV 0x88F4
+#define GL_MAX_PROGRAM_CALL_DEPTH_NV 0x88F5
+#define GL_MAX_PROGRAM_IF_DEPTH_NV 0x88F6
+#define GL_MAX_PROGRAM_LOOP_DEPTH_NV 0x88F7
+#define GL_MAX_PROGRAM_LOOP_COUNT_NV 0x88F8
+#endif
+
+#ifndef GL_NV_vertex_program2_option
+/* reuse GL_MAX_PROGRAM_EXEC_INSTRUCTIONS_NV */
+/* reuse GL_MAX_PROGRAM_CALL_DEPTH_NV */
+#endif
+
+#ifndef GL_NV_vertex_program3
+/* reuse GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS_ARB */
+#endif
+
+#ifndef GL_EXT_framebuffer_object
+#define GL_INVALID_FRAMEBUFFER_OPERATION_EXT 0x0506
+#define GL_MAX_RENDERBUFFER_SIZE_EXT 0x84E8
+#define GL_FRAMEBUFFER_BINDING_EXT 0x8CA6
+#define GL_RENDERBUFFER_BINDING_EXT 0x8CA7
+#define GL_FRAMEBUFFER_ATTACHMENT_TYPE_EXT 0x8CD0
+#define GL_FRAMEBUFFER_ATTACHMENT_NAME_EXT 0x8CD1
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL_EXT 0x8CD2
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE_EXT 0x8CD3
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_3D_ZOFFSET_EXT 0x8CD4
+#define GL_FRAMEBUFFER_COMPLETE_EXT 0x8CD5
+#define GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT_EXT 0x8CD6
+#define GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_EXT 0x8CD7
+#define GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS_EXT 0x8CD9
+#define GL_FRAMEBUFFER_INCOMPLETE_FORMATS_EXT 0x8CDA
+#define GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER_EXT 0x8CDB
+#define GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER_EXT 0x8CDC
+#define GL_FRAMEBUFFER_UNSUPPORTED_EXT 0x8CDD
+#define GL_MAX_COLOR_ATTACHMENTS_EXT 0x8CDF
+#define GL_COLOR_ATTACHMENT0_EXT 0x8CE0
+#define GL_COLOR_ATTACHMENT1_EXT 0x8CE1
+#define GL_COLOR_ATTACHMENT2_EXT 0x8CE2
+#define GL_COLOR_ATTACHMENT3_EXT 0x8CE3
+#define GL_COLOR_ATTACHMENT4_EXT 0x8CE4
+#define GL_COLOR_ATTACHMENT5_EXT 0x8CE5
+#define GL_COLOR_ATTACHMENT6_EXT 0x8CE6
+#define GL_COLOR_ATTACHMENT7_EXT 0x8CE7
+#define GL_COLOR_ATTACHMENT8_EXT 0x8CE8
+#define GL_COLOR_ATTACHMENT9_EXT 0x8CE9
+#define GL_COLOR_ATTACHMENT10_EXT 0x8CEA
+#define GL_COLOR_ATTACHMENT11_EXT 0x8CEB
+#define GL_COLOR_ATTACHMENT12_EXT 0x8CEC
+#define GL_COLOR_ATTACHMENT13_EXT 0x8CED
+#define GL_COLOR_ATTACHMENT14_EXT 0x8CEE
+#define GL_COLOR_ATTACHMENT15_EXT 0x8CEF
+#define GL_DEPTH_ATTACHMENT_EXT 0x8D00
+#define GL_STENCIL_ATTACHMENT_EXT 0x8D20
+#define GL_FRAMEBUFFER_EXT 0x8D40
+#define GL_RENDERBUFFER_EXT 0x8D41
+#define GL_RENDERBUFFER_WIDTH_EXT 0x8D42
+#define GL_RENDERBUFFER_HEIGHT_EXT 0x8D43
+#define GL_RENDERBUFFER_INTERNAL_FORMAT_EXT 0x8D44
+#define GL_STENCIL_INDEX1_EXT 0x8D46
+#define GL_STENCIL_INDEX4_EXT 0x8D47
+#define GL_STENCIL_INDEX8_EXT 0x8D48
+#define GL_STENCIL_INDEX16_EXT 0x8D49
+#define GL_RENDERBUFFER_RED_SIZE_EXT 0x8D50
+#define GL_RENDERBUFFER_GREEN_SIZE_EXT 0x8D51
+#define GL_RENDERBUFFER_BLUE_SIZE_EXT 0x8D52
+#define GL_RENDERBUFFER_ALPHA_SIZE_EXT 0x8D53
+#define GL_RENDERBUFFER_DEPTH_SIZE_EXT 0x8D54
+#define GL_RENDERBUFFER_STENCIL_SIZE_EXT 0x8D55
+#endif
+
+#ifndef GL_GREMEDY_string_marker
+#endif
+
+#ifndef GL_EXT_packed_depth_stencil
+#define GL_DEPTH_STENCIL_EXT 0x84F9
+#define GL_UNSIGNED_INT_24_8_EXT 0x84FA
+#define GL_DEPTH24_STENCIL8_EXT 0x88F0
+#define GL_TEXTURE_STENCIL_SIZE_EXT 0x88F1
+#endif
+
+#ifndef GL_EXT_stencil_clear_tag
+#define GL_STENCIL_TAG_BITS_EXT 0x88F2
+#define GL_STENCIL_CLEAR_TAG_VALUE_EXT 0x88F3
+#endif
+
+#ifndef GL_EXT_texture_sRGB
+#define GL_SRGB_EXT 0x8C40
+#define GL_SRGB8_EXT 0x8C41
+#define GL_SRGB_ALPHA_EXT 0x8C42
+#define GL_SRGB8_ALPHA8_EXT 0x8C43
+#define GL_SLUMINANCE_ALPHA_EXT 0x8C44
+#define GL_SLUMINANCE8_ALPHA8_EXT 0x8C45
+#define GL_SLUMINANCE_EXT 0x8C46
+#define GL_SLUMINANCE8_EXT 0x8C47
+#define GL_COMPRESSED_SRGB_EXT 0x8C48
+#define GL_COMPRESSED_SRGB_ALPHA_EXT 0x8C49
+#define GL_COMPRESSED_SLUMINANCE_EXT 0x8C4A
+#define GL_COMPRESSED_SLUMINANCE_ALPHA_EXT 0x8C4B
+#define GL_COMPRESSED_SRGB_S3TC_DXT1_EXT 0x8C4C
+#define GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT 0x8C4D
+#define GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT 0x8C4E
+#define GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT 0x8C4F
+#endif
+
+#ifndef GL_EXT_framebuffer_blit
+#define GL_READ_FRAMEBUFFER_EXT 0x8CA8
+#define GL_DRAW_FRAMEBUFFER_EXT 0x8CA9
+#define GL_DRAW_FRAMEBUFFER_BINDING_EXT GL_FRAMEBUFFER_BINDING_EXT
+#define GL_READ_FRAMEBUFFER_BINDING_EXT 0x8CAA
+#endif
+
+#ifndef GL_EXT_framebuffer_multisample
+#define GL_RENDERBUFFER_SAMPLES_EXT 0x8CAB
+#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE_EXT 0x8D56
+#define GL_MAX_SAMPLES_EXT 0x8D57
+#endif
+
+#ifndef GL_MESAX_texture_stack
+#define GL_TEXTURE_1D_STACK_MESAX 0x8759
+#define GL_TEXTURE_2D_STACK_MESAX 0x875A
+#define GL_PROXY_TEXTURE_1D_STACK_MESAX 0x875B
+#define GL_PROXY_TEXTURE_2D_STACK_MESAX 0x875C
+#define GL_TEXTURE_1D_STACK_BINDING_MESAX 0x875D
+#define GL_TEXTURE_2D_STACK_BINDING_MESAX 0x875E
+#endif
+
+#ifndef GL_EXT_timer_query
+#define GL_TIME_ELAPSED_EXT 0x88BF
+#endif
+
+#ifndef GL_EXT_gpu_program_parameters
+#endif
+
+#ifndef GL_APPLE_flush_buffer_range
+#define GL_BUFFER_SERIALIZED_MODIFY_APPLE 0x8A12
+#define GL_BUFFER_FLUSHING_UNMAP_APPLE 0x8A13
+#endif
+
+#ifndef GL_NV_gpu_program4
+#define GL_MIN_PROGRAM_TEXEL_OFFSET_NV 0x8904
+#define GL_MAX_PROGRAM_TEXEL_OFFSET_NV 0x8905
+#define GL_PROGRAM_ATTRIB_COMPONENTS_NV 0x8906
+#define GL_PROGRAM_RESULT_COMPONENTS_NV 0x8907
+#define GL_MAX_PROGRAM_ATTRIB_COMPONENTS_NV 0x8908
+#define GL_MAX_PROGRAM_RESULT_COMPONENTS_NV 0x8909
+#define GL_MAX_PROGRAM_GENERIC_ATTRIBS_NV 0x8DA5
+#define GL_MAX_PROGRAM_GENERIC_RESULTS_NV 0x8DA6
+#endif
+
+#ifndef GL_NV_geometry_program4
+#define GL_LINES_ADJACENCY_EXT 0x000A
+#define GL_LINE_STRIP_ADJACENCY_EXT 0x000B
+#define GL_TRIANGLES_ADJACENCY_EXT 0x000C
+#define GL_TRIANGLE_STRIP_ADJACENCY_EXT 0x000D
+#define GL_GEOMETRY_PROGRAM_NV 0x8C26
+#define GL_MAX_PROGRAM_OUTPUT_VERTICES_NV 0x8C27
+#define GL_MAX_PROGRAM_TOTAL_OUTPUT_COMPONENTS_NV 0x8C28
+#define GL_GEOMETRY_VERTICES_OUT_EXT 0x8DDA
+#define GL_GEOMETRY_INPUT_TYPE_EXT 0x8DDB
+#define GL_GEOMETRY_OUTPUT_TYPE_EXT 0x8DDC
+#define GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS_EXT 0x8C29
+#define GL_FRAMEBUFFER_ATTACHMENT_LAYERED_EXT 0x8DA7
+#define GL_FRAMEBUFFER_INCOMPLETE_LAYER_TARGETS_EXT 0x8DA8
+#define GL_FRAMEBUFFER_INCOMPLETE_LAYER_COUNT_EXT 0x8DA9
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LAYER_EXT 0x8CD4
+#define GL_PROGRAM_POINT_SIZE_EXT 0x8642
+#endif
+
+#ifndef GL_EXT_geometry_shader4
+#define GL_GEOMETRY_SHADER_EXT 0x8DD9
+/* reuse GL_GEOMETRY_VERTICES_OUT_EXT */
+/* reuse GL_GEOMETRY_INPUT_TYPE_EXT */
+/* reuse GL_GEOMETRY_OUTPUT_TYPE_EXT */
+/* reuse GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS_EXT */
+#define GL_MAX_GEOMETRY_VARYING_COMPONENTS_EXT 0x8DDD
+#define GL_MAX_VERTEX_VARYING_COMPONENTS_EXT 0x8DDE
+#define GL_MAX_VARYING_COMPONENTS_EXT 0x8B4B
+#define GL_MAX_GEOMETRY_UNIFORM_COMPONENTS_EXT 0x8DDF
+#define GL_MAX_GEOMETRY_OUTPUT_VERTICES_EXT 0x8DE0
+#define GL_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS_EXT 0x8DE1
+/* reuse GL_LINES_ADJACENCY_EXT */
+/* reuse GL_LINE_STRIP_ADJACENCY_EXT */
+/* reuse GL_TRIANGLES_ADJACENCY_EXT */
+/* reuse GL_TRIANGLE_STRIP_ADJACENCY_EXT */
+/* reuse GL_FRAMEBUFFER_INCOMPLETE_LAYER_TARGETS_EXT */
+/* reuse GL_FRAMEBUFFER_INCOMPLETE_LAYER_COUNT_EXT */
+/* reuse GL_FRAMEBUFFER_ATTACHMENT_LAYERED_EXT */
+/* reuse GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LAYER_EXT */
+/* reuse GL_PROGRAM_POINT_SIZE_EXT */
+#endif
+
+#ifndef GL_NV_vertex_program4
+#define GL_VERTEX_ATTRIB_ARRAY_INTEGER_NV 0x88FD
+#endif
+
+#ifndef GL_EXT_gpu_shader4
+#define GL_SAMPLER_1D_ARRAY_EXT 0x8DC0
+#define GL_SAMPLER_2D_ARRAY_EXT 0x8DC1
+#define GL_SAMPLER_BUFFER_EXT 0x8DC2
+#define GL_SAMPLER_1D_ARRAY_SHADOW_EXT 0x8DC3
+#define GL_SAMPLER_2D_ARRAY_SHADOW_EXT 0x8DC4
+#define GL_SAMPLER_CUBE_SHADOW_EXT 0x8DC5
+#define GL_UNSIGNED_INT_VEC2_EXT 0x8DC6
+#define GL_UNSIGNED_INT_VEC3_EXT 0x8DC7
+#define GL_UNSIGNED_INT_VEC4_EXT 0x8DC8
+#define GL_INT_SAMPLER_1D_EXT 0x8DC9
+#define GL_INT_SAMPLER_2D_EXT 0x8DCA
+#define GL_INT_SAMPLER_3D_EXT 0x8DCB
+#define GL_INT_SAMPLER_CUBE_EXT 0x8DCC
+#define GL_INT_SAMPLER_2D_RECT_EXT 0x8DCD
+#define GL_INT_SAMPLER_1D_ARRAY_EXT 0x8DCE
+#define GL_INT_SAMPLER_2D_ARRAY_EXT 0x8DCF
+#define GL_INT_SAMPLER_BUFFER_EXT 0x8DD0
+#define GL_UNSIGNED_INT_SAMPLER_1D_EXT 0x8DD1
+#define GL_UNSIGNED_INT_SAMPLER_2D_EXT 0x8DD2
+#define GL_UNSIGNED_INT_SAMPLER_3D_EXT 0x8DD3
+#define GL_UNSIGNED_INT_SAMPLER_CUBE_EXT 0x8DD4
+#define GL_UNSIGNED_INT_SAMPLER_2D_RECT_EXT 0x8DD5
+#define GL_UNSIGNED_INT_SAMPLER_1D_ARRAY_EXT 0x8DD6
+#define GL_UNSIGNED_INT_SAMPLER_2D_ARRAY_EXT 0x8DD7
+#define GL_UNSIGNED_INT_SAMPLER_BUFFER_EXT 0x8DD8
+#endif
+
+#ifndef GL_EXT_draw_instanced
+#endif
+
+#ifndef GL_EXT_packed_float
+#define GL_R11F_G11F_B10F_EXT 0x8C3A
+#define GL_UNSIGNED_INT_10F_11F_11F_REV_EXT 0x8C3B
+#define GL_RGBA_SIGNED_COMPONENTS_EXT 0x8C3C
+#endif
+
+#ifndef GL_EXT_texture_array
+#define GL_TEXTURE_1D_ARRAY_EXT 0x8C18
+#define GL_PROXY_TEXTURE_1D_ARRAY_EXT 0x8C19
+#define GL_TEXTURE_2D_ARRAY_EXT 0x8C1A
+#define GL_PROXY_TEXTURE_2D_ARRAY_EXT 0x8C1B
+#define GL_TEXTURE_BINDING_1D_ARRAY_EXT 0x8C1C
+#define GL_TEXTURE_BINDING_2D_ARRAY_EXT 0x8C1D
+#define GL_MAX_ARRAY_TEXTURE_LAYERS_EXT 0x88FF
+#define GL_COMPARE_REF_DEPTH_TO_TEXTURE_EXT 0x884E
+/* reuse GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LAYER_EXT */
+#endif
+
+#ifndef GL_EXT_texture_buffer_object
+#define GL_TEXTURE_BUFFER_EXT 0x8C2A
+#define GL_MAX_TEXTURE_BUFFER_SIZE_EXT 0x8C2B
+#define GL_TEXTURE_BINDING_BUFFER_EXT 0x8C2C
+#define GL_TEXTURE_BUFFER_DATA_STORE_BINDING_EXT 0x8C2D
+#define GL_TEXTURE_BUFFER_FORMAT_EXT 0x8C2E
+#endif
+
+#ifndef GL_EXT_texture_compression_latc
+#define GL_COMPRESSED_LUMINANCE_LATC1_EXT 0x8C70
+#define GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT 0x8C71
+#define GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT 0x8C72
+#define GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT 0x8C73
+#endif
+
+#ifndef GL_EXT_texture_compression_rgtc
+#define GL_COMPRESSED_RED_RGTC1_EXT 0x8DBB
+#define GL_COMPRESSED_SIGNED_RED_RGTC1_EXT 0x8DBC
+#define GL_COMPRESSED_RED_GREEN_RGTC2_EXT 0x8DBD
+#define GL_COMPRESSED_SIGNED_RED_GREEN_RGTC2_EXT 0x8DBE
+#endif
+
+#ifndef GL_EXT_texture_shared_exponent
+#define GL_RGB9_E5_EXT 0x8C3D
+#define GL_UNSIGNED_INT_5_9_9_9_REV_EXT 0x8C3E
+#define GL_TEXTURE_SHARED_SIZE_EXT 0x8C3F
+#endif
+
+#ifndef GL_NV_depth_buffer_float
+#define GL_DEPTH_COMPONENT32F_NV 0x8DAB
+#define GL_DEPTH32F_STENCIL8_NV 0x8DAC
+#define GL_FLOAT_32_UNSIGNED_INT_24_8_REV_NV 0x8DAD
+#define GL_DEPTH_BUFFER_FLOAT_MODE_NV 0x8DAF
+#endif
+
+#ifndef GL_NV_fragment_program4
+#endif
+
+#ifndef GL_NV_framebuffer_multisample_coverage
+#define GL_RENDERBUFFER_COVERAGE_SAMPLES_NV 0x8CAB
+#define GL_RENDERBUFFER_COLOR_SAMPLES_NV 0x8E10
+#define GL_MAX_MULTISAMPLE_COVERAGE_MODES_NV 0x8E11
+#define GL_MULTISAMPLE_COVERAGE_MODES_NV 0x8E12
+#endif
+
+#ifndef GL_EXT_framebuffer_sRGB
+#define GL_FRAMEBUFFER_SRGB_EXT 0x8DB9
+#define GL_FRAMEBUFFER_SRGB_CAPABLE_EXT 0x8DBA
+#endif
+
+#ifndef GL_NV_geometry_shader4
+#endif
+
+#ifndef GL_NV_parameter_buffer_object
+#define GL_MAX_PROGRAM_PARAMETER_BUFFER_BINDINGS_NV 0x8DA0
+#define GL_MAX_PROGRAM_PARAMETER_BUFFER_SIZE_NV 0x8DA1
+#define GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV 0x8DA2
+#define GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV 0x8DA3
+#define GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV 0x8DA4
+#endif
+
+#ifndef GL_EXT_draw_buffers2
+#endif
+
+#ifndef GL_NV_transform_feedback
+#define GL_BACK_PRIMARY_COLOR_NV 0x8C77
+#define GL_BACK_SECONDARY_COLOR_NV 0x8C78
+#define GL_TEXTURE_COORD_NV 0x8C79
+#define GL_CLIP_DISTANCE_NV 0x8C7A
+#define GL_VERTEX_ID_NV 0x8C7B
+#define GL_PRIMITIVE_ID_NV 0x8C7C
+#define GL_GENERIC_ATTRIB_NV 0x8C7D
+#define GL_TRANSFORM_FEEDBACK_ATTRIBS_NV 0x8C7E
+#define GL_TRANSFORM_FEEDBACK_BUFFER_MODE_NV 0x8C7F
+#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_NV 0x8C80
+#define GL_ACTIVE_VARYINGS_NV 0x8C81
+#define GL_ACTIVE_VARYING_MAX_LENGTH_NV 0x8C82
+#define GL_TRANSFORM_FEEDBACK_VARYINGS_NV 0x8C83
+#define GL_TRANSFORM_FEEDBACK_BUFFER_START_NV 0x8C84
+#define GL_TRANSFORM_FEEDBACK_BUFFER_SIZE_NV 0x8C85
+#define GL_TRANSFORM_FEEDBACK_RECORD_NV 0x8C86
+#define GL_PRIMITIVES_GENERATED_NV 0x8C87
+#define GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN_NV 0x8C88
+#define GL_RASTERIZER_DISCARD_NV 0x8C89
+#define GL_MAX_TRANSFORM_FEEDBACK_INTERLEAVED_ATTRIBS_NV 0x8C8A
+#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS_NV 0x8C8B
+#define GL_INTERLEAVED_ATTRIBS_NV 0x8C8C
+#define GL_SEPARATE_ATTRIBS_NV 0x8C8D
+#define GL_TRANSFORM_FEEDBACK_BUFFER_NV 0x8C8E
+#define GL_TRANSFORM_FEEDBACK_BUFFER_BINDING_NV 0x8C8F
+#endif
+
+#ifndef GL_EXT_bindable_uniform
+#define GL_MAX_VERTEX_BINDABLE_UNIFORMS_EXT 0x8DE2
+#define GL_MAX_FRAGMENT_BINDABLE_UNIFORMS_EXT 0x8DE3
+#define GL_MAX_GEOMETRY_BINDABLE_UNIFORMS_EXT 0x8DE4
+#define GL_MAX_BINDABLE_UNIFORM_SIZE_EXT 0x8DED
+#define GL_UNIFORM_BUFFER_EXT 0x8DEE
+#define GL_UNIFORM_BUFFER_BINDING_EXT 0x8DEF
+#endif
+
+#ifndef GL_EXT_texture_integer
+#define GL_RGBA32UI_EXT 0x8D70
+#define GL_RGB32UI_EXT 0x8D71
+#define GL_ALPHA32UI_EXT 0x8D72
+#define GL_INTENSITY32UI_EXT 0x8D73
+#define GL_LUMINANCE32UI_EXT 0x8D74
+#define GL_LUMINANCE_ALPHA32UI_EXT 0x8D75
+#define GL_RGBA16UI_EXT 0x8D76
+#define GL_RGB16UI_EXT 0x8D77
+#define GL_ALPHA16UI_EXT 0x8D78
+#define GL_INTENSITY16UI_EXT 0x8D79
+#define GL_LUMINANCE16UI_EXT 0x8D7A
+#define GL_LUMINANCE_ALPHA16UI_EXT 0x8D7B
+#define GL_RGBA8UI_EXT 0x8D7C
+#define GL_RGB8UI_EXT 0x8D7D
+#define GL_ALPHA8UI_EXT 0x8D7E
+#define GL_INTENSITY8UI_EXT 0x8D7F
+#define GL_LUMINANCE8UI_EXT 0x8D80
+#define GL_LUMINANCE_ALPHA8UI_EXT 0x8D81
+#define GL_RGBA32I_EXT 0x8D82
+#define GL_RGB32I_EXT 0x8D83
+#define GL_ALPHA32I_EXT 0x8D84
+#define GL_INTENSITY32I_EXT 0x8D85
+#define GL_LUMINANCE32I_EXT 0x8D86
+#define GL_LUMINANCE_ALPHA32I_EXT 0x8D87
+#define GL_RGBA16I_EXT 0x8D88
+#define GL_RGB16I_EXT 0x8D89
+#define GL_ALPHA16I_EXT 0x8D8A
+#define GL_INTENSITY16I_EXT 0x8D8B
+#define GL_LUMINANCE16I_EXT 0x8D8C
+#define GL_LUMINANCE_ALPHA16I_EXT 0x8D8D
+#define GL_RGBA8I_EXT 0x8D8E
+#define GL_RGB8I_EXT 0x8D8F
+#define GL_ALPHA8I_EXT 0x8D90
+#define GL_INTENSITY8I_EXT 0x8D91
+#define GL_LUMINANCE8I_EXT 0x8D92
+#define GL_LUMINANCE_ALPHA8I_EXT 0x8D93
+#define GL_RED_INTEGER_EXT 0x8D94
+#define GL_GREEN_INTEGER_EXT 0x8D95
+#define GL_BLUE_INTEGER_EXT 0x8D96
+#define GL_ALPHA_INTEGER_EXT 0x8D97
+#define GL_RGB_INTEGER_EXT 0x8D98
+#define GL_RGBA_INTEGER_EXT 0x8D99
+#define GL_BGR_INTEGER_EXT 0x8D9A
+#define GL_BGRA_INTEGER_EXT 0x8D9B
+#define GL_LUMINANCE_INTEGER_EXT 0x8D9C
+#define GL_LUMINANCE_ALPHA_INTEGER_EXT 0x8D9D
+#define GL_RGBA_INTEGER_MODE_EXT 0x8D9E
+#endif
+
+#ifndef GL_GREMEDY_frame_terminator
+#endif
+
+#ifndef GL_NV_conditional_render
+#define GL_QUERY_WAIT_NV 0x8E13
+#define GL_QUERY_NO_WAIT_NV 0x8E14
+#define GL_QUERY_BY_REGION_WAIT_NV 0x8E15
+#define GL_QUERY_BY_REGION_NO_WAIT_NV 0x8E16
+#endif
+
+#ifndef GL_NV_present_video
+#define GL_FRAME_NV 0x8E26
+#define GL_FIELDS_NV 0x8E27
+#define GL_CURRENT_TIME_NV 0x8E28
+#define GL_NUM_FILL_STREAMS_NV 0x8E29
+#define GL_PRESENT_TIME_NV 0x8E2A
+#define GL_PRESENT_DURATION_NV 0x8E2B
+#endif
+
+#ifndef GL_EXT_transform_feedback
+#define GL_TRANSFORM_FEEDBACK_BUFFER_EXT 0x8C8E
+#define GL_TRANSFORM_FEEDBACK_BUFFER_START_EXT 0x8C84
+#define GL_TRANSFORM_FEEDBACK_BUFFER_SIZE_EXT 0x8C85
+#define GL_TRANSFORM_FEEDBACK_BUFFER_BINDING_EXT 0x8C8F
+#define GL_INTERLEAVED_ATTRIBS_EXT 0x8C8C
+#define GL_SEPARATE_ATTRIBS_EXT 0x8C8D
+#define GL_PRIMITIVES_GENERATED_EXT 0x8C87
+#define GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN_EXT 0x8C88
+#define GL_RASTERIZER_DISCARD_EXT 0x8C89
+#define GL_MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT 0x8C8A
+#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS_EXT 0x8C8B
+#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_EXT 0x8C80
+#define GL_TRANSFORM_FEEDBACK_VARYINGS_EXT 0x8C83
+#define GL_TRANSFORM_FEEDBACK_BUFFER_MODE_EXT 0x8C7F
+#define GL_TRANSFORM_FEEDBACK_VARYING_MAX_LENGTH_EXT 0x8C76
+#endif
+
+#ifndef GL_EXT_direct_state_access
+#define GL_PROGRAM_MATRIX_EXT 0x8E2D
+#define GL_TRANSPOSE_PROGRAM_MATRIX_EXT 0x8E2E
+#define GL_PROGRAM_MATRIX_STACK_DEPTH_EXT 0x8E2F
+#endif
+
+#ifndef GL_EXT_vertex_array_bgra
+/* reuse GL_BGRA */
+#endif
+
+#ifndef GL_EXT_texture_swizzle
+#define GL_TEXTURE_SWIZZLE_R_EXT 0x8E42
+#define GL_TEXTURE_SWIZZLE_G_EXT 0x8E43
+#define GL_TEXTURE_SWIZZLE_B_EXT 0x8E44
+#define GL_TEXTURE_SWIZZLE_A_EXT 0x8E45
+#define GL_TEXTURE_SWIZZLE_RGBA_EXT 0x8E46
+#endif
+
+#ifndef GL_NV_explicit_multisample
+#define GL_SAMPLE_POSITION_NV 0x8E50
+#define GL_SAMPLE_MASK_NV 0x8E51
+#define GL_SAMPLE_MASK_VALUE_NV 0x8E52
+#define GL_TEXTURE_BINDING_RENDERBUFFER_NV 0x8E53
+#define GL_TEXTURE_RENDERBUFFER_DATA_STORE_BINDING_NV 0x8E54
+#define GL_MAX_SAMPLE_MASK_WORDS_NV 0x8E59
+#define GL_TEXTURE_RENDERBUFFER_NV 0x8E55
+#define GL_SAMPLER_RENDERBUFFER_NV 0x8E56
+#define GL_INT_SAMPLER_RENDERBUFFER_NV 0x8E57
+#define GL_UNSIGNED_INT_SAMPLER_RENDERBUFFER_NV 0x8E58
+#endif
+
+#ifndef GL_NV_transform_feedback2
+#define GL_TRANSFORM_FEEDBACK_NV 0x8E22
+#define GL_TRANSFORM_FEEDBACK_BUFFER_PAUSED_NV 0x8E23
+#define GL_TRANSFORM_FEEDBACK_BUFFER_ACTIVE_NV 0x8E24
+#define GL_TRANSFORM_FEEDBACK_BINDING_NV 0x8E25
+#endif
+
+#ifndef GL_ATI_meminfo
+#define GL_VBO_FREE_MEMORY_ATI 0x87FB
+#define GL_TEXTURE_FREE_MEMORY_ATI 0x87FC
+#define GL_RENDERBUFFER_FREE_MEMORY_ATI 0x87FD
+#endif
+
+#ifndef GL_AMD_performance_monitor
+#define GL_COUNTER_TYPE_AMD 0x8BC0
+#define GL_COUNTER_RANGE_AMD 0x8BC1
+#define GL_UNSIGNED_INT64_AMD 0x8BC2
+#define GL_PERCENTAGE_AMD 0x8BC3
+#define GL_PERFMON_RESULT_AVAILABLE_AMD 0x8BC4
+#define GL_PERFMON_RESULT_SIZE_AMD 0x8BC5
+#define GL_PERFMON_RESULT_AMD 0x8BC6
+#endif
+
+#ifndef GL_AMD_texture_texture4
+#endif
+
+#ifndef GL_AMD_vertex_shader_tesselator
+#define GL_SAMPLER_BUFFER_AMD 0x9001
+#define GL_INT_SAMPLER_BUFFER_AMD 0x9002
+#define GL_UNSIGNED_INT_SAMPLER_BUFFER_AMD 0x9003
+#define GL_TESSELLATION_MODE_AMD 0x9004
+#define GL_TESSELLATION_FACTOR_AMD 0x9005
+#define GL_DISCRETE_AMD 0x9006
+#define GL_CONTINUOUS_AMD 0x9007
+#endif
+
+#ifndef GL_EXT_provoking_vertex
+#define GL_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION_EXT 0x8E4C
+#define GL_FIRST_VERTEX_CONVENTION_EXT 0x8E4D
+#define GL_LAST_VERTEX_CONVENTION_EXT 0x8E4E
+#define GL_PROVOKING_VERTEX_EXT 0x8E4F
+#endif
+
+
+/*************************************************************/
+
+#include <stddef.h>
+#ifndef GL_VERSION_2_0
+/* GL type for program/shader text */
+typedef char GLchar; /* native character */
+#endif
+
+#ifndef GL_VERSION_1_5
+/* GL types for handling large vertex buffer objects */
+typedef ptrdiff_t GLintptr;
+typedef ptrdiff_t GLsizeiptr;
+#endif
+
+#ifndef GL_ARB_vertex_buffer_object
+/* GL types for handling large vertex buffer objects */
+typedef ptrdiff_t GLintptrARB;
+typedef ptrdiff_t GLsizeiptrARB;
+#endif
+
+#ifndef GL_ARB_shader_objects
+/* GL types for handling shader object handles and program/shader text */
+typedef char GLcharARB; /* native character */
+typedef unsigned int GLhandleARB; /* shader object handle */
+#endif
+
+/* GL types for "half" precision (s10e5) float data in host memory */
+#ifndef GL_ARB_half_float_pixel
+typedef unsigned short GLhalfARB;
+#endif
+
+#ifndef GL_NV_half_float
+typedef unsigned short GLhalfNV;
+#endif
+
+#ifndef GLEXT_64_TYPES_DEFINED
+/* This code block is duplicated in glxext.h, so must be protected */
+#define GLEXT_64_TYPES_DEFINED
+/* Define int32_t, int64_t, and uint64_t types for UST/MSC */
+/* (as used in the GL_EXT_timer_query extension). */
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+#include <inttypes.h>
+#elif defined(__sun__) || defined(__digital__)
+#include <inttypes.h>
+#if defined(__STDC__)
+#if defined(__arch64__) || defined(_LP64)
+typedef long int int64_t;
+typedef unsigned long int uint64_t;
+#else
+typedef long long int int64_t;
+typedef unsigned long long int uint64_t;
+#endif /* __arch64__ */
+#endif /* __STDC__ */
+#elif defined( __VMS ) || defined(__sgi)
+#include <inttypes.h>
+#elif defined(__SCO__) || defined(__USLC__)
+#include <stdint.h>
+#elif defined(__UNIXOS2__) || defined(__SOL64__)
+typedef long int int32_t;
+typedef long long int int64_t;
+typedef unsigned long long int uint64_t;
+#elif defined(_WIN32) && defined(__GNUC__)
+#include <stdint.h>
+#elif defined(_WIN32)
+typedef __int32 int32_t;
+typedef __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+#else
+#include <inttypes.h> /* Fallback option */
+#endif
+#endif
+
+#ifndef GL_EXT_timer_query
+typedef int64_t GLint64EXT;
+typedef uint64_t GLuint64EXT;
+#endif
+
+#ifndef GL_VERSION_1_2
+#define GL_VERSION_1_2 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glBlendColor (GLclampf, GLclampf, GLclampf, GLclampf);
+GLAPI void APIENTRY glBlendEquation (GLenum);
+GLAPI void APIENTRY glDrawRangeElements (GLenum, GLuint, GLuint, GLsizei, GLenum, const GLvoid *);
+GLAPI void APIENTRY glColorTable (GLenum, GLenum, GLsizei, GLenum, GLenum, const GLvoid *);
+GLAPI void APIENTRY glColorTableParameterfv (GLenum, GLenum, const GLfloat *);
+GLAPI void APIENTRY glColorTableParameteriv (GLenum, GLenum, const GLint *);
+GLAPI void APIENTRY glCopyColorTable (GLenum, GLenum, GLint, GLint, GLsizei);
+GLAPI void APIENTRY glGetColorTable (GLenum, GLenum, GLenum, GLvoid *);
+GLAPI void APIENTRY glGetColorTableParameterfv (GLenum, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetColorTableParameteriv (GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glColorSubTable (GLenum, GLsizei, GLsizei, GLenum, GLenum, const GLvoid *);
+GLAPI void APIENTRY glCopyColorSubTable (GLenum, GLsizei, GLint, GLint, GLsizei);
+GLAPI void APIENTRY glConvolutionFilter1D (GLenum, GLenum, GLsizei, GLenum, GLenum, const GLvoid *);
+GLAPI void APIENTRY glConvolutionFilter2D (GLenum, GLenum, GLsizei, GLsizei, GLenum, GLenum, const GLvoid *);
+GLAPI void APIENTRY glConvolutionParameterf (GLenum, GLenum, GLfloat);
+GLAPI void APIENTRY glConvolutionParameterfv (GLenum, GLenum, const GLfloat *);
+GLAPI void APIENTRY glConvolutionParameteri (GLenum, GLenum, GLint);
+GLAPI void APIENTRY glConvolutionParameteriv (GLenum, GLenum, const GLint *);
+GLAPI void APIENTRY glCopyConvolutionFilter1D (GLenum, GLenum, GLint, GLint, GLsizei);
+GLAPI void APIENTRY glCopyConvolutionFilter2D (GLenum, GLenum, GLint, GLint, GLsizei, GLsizei);
+GLAPI void APIENTRY glGetConvolutionFilter (GLenum, GLenum, GLenum, GLvoid *);
+GLAPI void APIENTRY glGetConvolutionParameterfv (GLenum, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetConvolutionParameteriv (GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glGetSeparableFilter (GLenum, GLenum, GLenum, GLvoid *, GLvoid *, GLvoid *);
+GLAPI void APIENTRY glSeparableFilter2D (GLenum, GLenum, GLsizei, GLsizei, GLenum, GLenum, const GLvoid *, const GLvoid *);
+GLAPI void APIENTRY glGetHistogram (GLenum, GLboolean, GLenum, GLenum, GLvoid *);
+GLAPI void APIENTRY glGetHistogramParameterfv (GLenum, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetHistogramParameteriv (GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glGetMinmax (GLenum, GLboolean, GLenum, GLenum, GLvoid *);
+GLAPI void APIENTRY glGetMinmaxParameterfv (GLenum, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetMinmaxParameteriv (GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glHistogram (GLenum, GLsizei, GLenum, GLboolean);
+GLAPI void APIENTRY glMinmax (GLenum, GLenum, GLboolean);
+GLAPI void APIENTRY glResetHistogram (GLenum);
+GLAPI void APIENTRY glResetMinmax (GLenum);
+GLAPI void APIENTRY glTexImage3D (GLenum, GLint, GLint, GLsizei, GLsizei, GLsizei, GLint, GLenum, GLenum, const GLvoid *);
+GLAPI void APIENTRY glTexSubImage3D (GLenum, GLint, GLint, GLint, GLint, GLsizei, GLsizei, GLsizei, GLenum, GLenum, const GLvoid *);
+GLAPI void APIENTRY glCopyTexSubImage3D (GLenum, GLint, GLint, GLint, GLint, GLint, GLint, GLsizei, GLsizei);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLBLENDCOLORPROC) (GLclampf red, GLclampf green, GLclampf blue, GLclampf alpha);
+typedef void (APIENTRYP PFNGLBLENDEQUATIONPROC) (GLenum mode);
+typedef void (APIENTRYP PFNGLDRAWRANGEELEMENTSPROC) (GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const GLvoid *indices);
+typedef void (APIENTRYP PFNGLCOLORTABLEPROC) (GLenum target, GLenum internalformat, GLsizei width, GLenum format, GLenum type, const GLvoid *table);
+typedef void (APIENTRYP PFNGLCOLORTABLEPARAMETERFVPROC) (GLenum target, GLenum pname, const GLfloat *params);
+typedef void (APIENTRYP PFNGLCOLORTABLEPARAMETERIVPROC) (GLenum target, GLenum pname, const GLint *params);
+typedef void (APIENTRYP PFNGLCOPYCOLORTABLEPROC) (GLenum target, GLenum internalformat, GLint x, GLint y, GLsizei width);
+typedef void (APIENTRYP PFNGLGETCOLORTABLEPROC) (GLenum target, GLenum format, GLenum type, GLvoid *table);
+typedef void (APIENTRYP PFNGLGETCOLORTABLEPARAMETERFVPROC) (GLenum target, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETCOLORTABLEPARAMETERIVPROC) (GLenum target, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLCOLORSUBTABLEPROC) (GLenum target, GLsizei start, GLsizei count, GLenum format, GLenum type, const GLvoid *data);
+typedef void (APIENTRYP PFNGLCOPYCOLORSUBTABLEPROC) (GLenum target, GLsizei start, GLint x, GLint y, GLsizei width);
+typedef void (APIENTRYP PFNGLCONVOLUTIONFILTER1DPROC) (GLenum target, GLenum internalformat, GLsizei width, GLenum format, GLenum type, const GLvoid *image);
+typedef void (APIENTRYP PFNGLCONVOLUTIONFILTER2DPROC) (GLenum target, GLenum internalformat, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *image);
+typedef void (APIENTRYP PFNGLCONVOLUTIONPARAMETERFPROC) (GLenum target, GLenum pname, GLfloat params);
+typedef void (APIENTRYP PFNGLCONVOLUTIONPARAMETERFVPROC) (GLenum target, GLenum pname, const GLfloat *params);
+typedef void (APIENTRYP PFNGLCONVOLUTIONPARAMETERIPROC) (GLenum target, GLenum pname, GLint params);
+typedef void (APIENTRYP PFNGLCONVOLUTIONPARAMETERIVPROC) (GLenum target, GLenum pname, const GLint *params);
+typedef void (APIENTRYP PFNGLCOPYCONVOLUTIONFILTER1DPROC) (GLenum target, GLenum internalformat, GLint x, GLint y, GLsizei width);
+typedef void (APIENTRYP PFNGLCOPYCONVOLUTIONFILTER2DPROC) (GLenum target, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height);
+typedef void (APIENTRYP PFNGLGETCONVOLUTIONFILTERPROC) (GLenum target, GLenum format, GLenum type, GLvoid *image);
+typedef void (APIENTRYP PFNGLGETCONVOLUTIONPARAMETERFVPROC) (GLenum target, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETCONVOLUTIONPARAMETERIVPROC) (GLenum target, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETSEPARABLEFILTERPROC) (GLenum target, GLenum format, GLenum type, GLvoid *row, GLvoid *column, GLvoid *span);
+typedef void (APIENTRYP PFNGLSEPARABLEFILTER2DPROC) (GLenum target, GLenum internalformat, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *row, const GLvoid *column);
+typedef void (APIENTRYP PFNGLGETHISTOGRAMPROC) (GLenum target, GLboolean reset, GLenum format, GLenum type, GLvoid *values);
+typedef void (APIENTRYP PFNGLGETHISTOGRAMPARAMETERFVPROC) (GLenum target, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETHISTOGRAMPARAMETERIVPROC) (GLenum target, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETMINMAXPROC) (GLenum target, GLboolean reset, GLenum format, GLenum type, GLvoid *values);
+typedef void (APIENTRYP PFNGLGETMINMAXPARAMETERFVPROC) (GLenum target, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETMINMAXPARAMETERIVPROC) (GLenum target, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLHISTOGRAMPROC) (GLenum target, GLsizei width, GLenum internalformat, GLboolean sink);
+typedef void (APIENTRYP PFNGLMINMAXPROC) (GLenum target, GLenum internalformat, GLboolean sink);
+typedef void (APIENTRYP PFNGLRESETHISTOGRAMPROC) (GLenum target);
+typedef void (APIENTRYP PFNGLRESETMINMAXPROC) (GLenum target);
+typedef void (APIENTRYP PFNGLTEXIMAGE3DPROC) (GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const GLvoid *pixels);
+typedef void (APIENTRYP PFNGLTEXSUBIMAGE3DPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const GLvoid *pixels);
+typedef void (APIENTRYP PFNGLCOPYTEXSUBIMAGE3DPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height);
+#endif
+
+#ifndef GL_VERSION_1_3
+#define GL_VERSION_1_3 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glActiveTexture (GLenum);
+GLAPI void APIENTRY glClientActiveTexture (GLenum);
+GLAPI void APIENTRY glMultiTexCoord1d (GLenum, GLdouble);
+GLAPI void APIENTRY glMultiTexCoord1dv (GLenum, const GLdouble *);
+GLAPI void APIENTRY glMultiTexCoord1f (GLenum, GLfloat);
+GLAPI void APIENTRY glMultiTexCoord1fv (GLenum, const GLfloat *);
+GLAPI void APIENTRY glMultiTexCoord1i (GLenum, GLint);
+GLAPI void APIENTRY glMultiTexCoord1iv (GLenum, const GLint *);
+GLAPI void APIENTRY glMultiTexCoord1s (GLenum, GLshort);
+GLAPI void APIENTRY glMultiTexCoord1sv (GLenum, const GLshort *);
+GLAPI void APIENTRY glMultiTexCoord2d (GLenum, GLdouble, GLdouble);
+GLAPI void APIENTRY glMultiTexCoord2dv (GLenum, const GLdouble *);
+GLAPI void APIENTRY glMultiTexCoord2f (GLenum, GLfloat, GLfloat);
+GLAPI void APIENTRY glMultiTexCoord2fv (GLenum, const GLfloat *);
+GLAPI void APIENTRY glMultiTexCoord2i (GLenum, GLint, GLint);
+GLAPI void APIENTRY glMultiTexCoord2iv (GLenum, const GLint *);
+GLAPI void APIENTRY glMultiTexCoord2s (GLenum, GLshort, GLshort);
+GLAPI void APIENTRY glMultiTexCoord2sv (GLenum, const GLshort *);
+GLAPI void APIENTRY glMultiTexCoord3d (GLenum, GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glMultiTexCoord3dv (GLenum, const GLdouble *);
+GLAPI void APIENTRY glMultiTexCoord3f (GLenum, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glMultiTexCoord3fv (GLenum, const GLfloat *);
+GLAPI void APIENTRY glMultiTexCoord3i (GLenum, GLint, GLint, GLint);
+GLAPI void APIENTRY glMultiTexCoord3iv (GLenum, const GLint *);
+GLAPI void APIENTRY glMultiTexCoord3s (GLenum, GLshort, GLshort, GLshort);
+GLAPI void APIENTRY glMultiTexCoord3sv (GLenum, const GLshort *);
+GLAPI void APIENTRY glMultiTexCoord4d (GLenum, GLdouble, GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glMultiTexCoord4dv (GLenum, const GLdouble *);
+GLAPI void APIENTRY glMultiTexCoord4f (GLenum, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glMultiTexCoord4fv (GLenum, const GLfloat *);
+GLAPI void APIENTRY glMultiTexCoord4i (GLenum, GLint, GLint, GLint, GLint);
+GLAPI void APIENTRY glMultiTexCoord4iv (GLenum, const GLint *);
+GLAPI void APIENTRY glMultiTexCoord4s (GLenum, GLshort, GLshort, GLshort, GLshort);
+GLAPI void APIENTRY glMultiTexCoord4sv (GLenum, const GLshort *);
+GLAPI void APIENTRY glLoadTransposeMatrixf (const GLfloat *);
+GLAPI void APIENTRY glLoadTransposeMatrixd (const GLdouble *);
+GLAPI void APIENTRY glMultTransposeMatrixf (const GLfloat *);
+GLAPI void APIENTRY glMultTransposeMatrixd (const GLdouble *);
+GLAPI void APIENTRY glSampleCoverage (GLclampf, GLboolean);
+GLAPI void APIENTRY glCompressedTexImage3D (GLenum, GLint, GLenum, GLsizei, GLsizei, GLsizei, GLint, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glCompressedTexImage2D (GLenum, GLint, GLenum, GLsizei, GLsizei, GLint, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glCompressedTexImage1D (GLenum, GLint, GLenum, GLsizei, GLint, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glCompressedTexSubImage3D (GLenum, GLint, GLint, GLint, GLint, GLsizei, GLsizei, GLsizei, GLenum, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glCompressedTexSubImage2D (GLenum, GLint, GLint, GLint, GLsizei, GLsizei, GLenum, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glCompressedTexSubImage1D (GLenum, GLint, GLint, GLsizei, GLenum, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glGetCompressedTexImage (GLenum, GLint, GLvoid *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLACTIVETEXTUREPROC) (GLenum texture);
+typedef void (APIENTRYP PFNGLCLIENTACTIVETEXTUREPROC) (GLenum texture);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD1DPROC) (GLenum target, GLdouble s);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD1DVPROC) (GLenum target, const GLdouble *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD1FPROC) (GLenum target, GLfloat s);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD1FVPROC) (GLenum target, const GLfloat *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD1IPROC) (GLenum target, GLint s);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD1IVPROC) (GLenum target, const GLint *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD1SPROC) (GLenum target, GLshort s);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD1SVPROC) (GLenum target, const GLshort *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD2DPROC) (GLenum target, GLdouble s, GLdouble t);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD2DVPROC) (GLenum target, const GLdouble *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD2FPROC) (GLenum target, GLfloat s, GLfloat t);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD2FVPROC) (GLenum target, const GLfloat *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD2IPROC) (GLenum target, GLint s, GLint t);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD2IVPROC) (GLenum target, const GLint *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD2SPROC) (GLenum target, GLshort s, GLshort t);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD2SVPROC) (GLenum target, const GLshort *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD3DPROC) (GLenum target, GLdouble s, GLdouble t, GLdouble r);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD3DVPROC) (GLenum target, const GLdouble *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD3FPROC) (GLenum target, GLfloat s, GLfloat t, GLfloat r);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD3FVPROC) (GLenum target, const GLfloat *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD3IPROC) (GLenum target, GLint s, GLint t, GLint r);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD3IVPROC) (GLenum target, const GLint *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD3SPROC) (GLenum target, GLshort s, GLshort t, GLshort r);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD3SVPROC) (GLenum target, const GLshort *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD4DPROC) (GLenum target, GLdouble s, GLdouble t, GLdouble r, GLdouble q);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD4DVPROC) (GLenum target, const GLdouble *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD4FPROC) (GLenum target, GLfloat s, GLfloat t, GLfloat r, GLfloat q);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD4FVPROC) (GLenum target, const GLfloat *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD4IPROC) (GLenum target, GLint s, GLint t, GLint r, GLint q);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD4IVPROC) (GLenum target, const GLint *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD4SPROC) (GLenum target, GLshort s, GLshort t, GLshort r, GLshort q);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD4SVPROC) (GLenum target, const GLshort *v);
+typedef void (APIENTRYP PFNGLLOADTRANSPOSEMATRIXFPROC) (const GLfloat *m);
+typedef void (APIENTRYP PFNGLLOADTRANSPOSEMATRIXDPROC) (const GLdouble *m);
+typedef void (APIENTRYP PFNGLMULTTRANSPOSEMATRIXFPROC) (const GLfloat *m);
+typedef void (APIENTRYP PFNGLMULTTRANSPOSEMATRIXDPROC) (const GLdouble *m);
+typedef void (APIENTRYP PFNGLSAMPLECOVERAGEPROC) (GLclampf value, GLboolean invert);
+typedef void (APIENTRYP PFNGLCOMPRESSEDTEXIMAGE3DPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLsizei imageSize, const GLvoid *data);
+typedef void (APIENTRYP PFNGLCOMPRESSEDTEXIMAGE2DPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const GLvoid *data);
+typedef void (APIENTRYP PFNGLCOMPRESSEDTEXIMAGE1DPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLint border, GLsizei imageSize, const GLvoid *data);
+typedef void (APIENTRYP PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const GLvoid *data);
+typedef void (APIENTRYP PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const GLvoid *data);
+typedef void (APIENTRYP PFNGLCOMPRESSEDTEXSUBIMAGE1DPROC) (GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLsizei imageSize, const GLvoid *data);
+typedef void (APIENTRYP PFNGLGETCOMPRESSEDTEXIMAGEPROC) (GLenum target, GLint level, GLvoid *img);
+#endif
+
+#ifndef GL_VERSION_1_4
+#define GL_VERSION_1_4 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glBlendFuncSeparate (GLenum, GLenum, GLenum, GLenum);
+GLAPI void APIENTRY glFogCoordf (GLfloat);
+GLAPI void APIENTRY glFogCoordfv (const GLfloat *);
+GLAPI void APIENTRY glFogCoordd (GLdouble);
+GLAPI void APIENTRY glFogCoorddv (const GLdouble *);
+GLAPI void APIENTRY glFogCoordPointer (GLenum, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glMultiDrawArrays (GLenum, GLint *, GLsizei *, GLsizei);
+GLAPI void APIENTRY glMultiDrawElements (GLenum, const GLsizei *, GLenum, const GLvoid* *, GLsizei);
+GLAPI void APIENTRY glPointParameterf (GLenum, GLfloat);
+GLAPI void APIENTRY glPointParameterfv (GLenum, const GLfloat *);
+GLAPI void APIENTRY glPointParameteri (GLenum, GLint);
+GLAPI void APIENTRY glPointParameteriv (GLenum, const GLint *);
+GLAPI void APIENTRY glSecondaryColor3b (GLbyte, GLbyte, GLbyte);
+GLAPI void APIENTRY glSecondaryColor3bv (const GLbyte *);
+GLAPI void APIENTRY glSecondaryColor3d (GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glSecondaryColor3dv (const GLdouble *);
+GLAPI void APIENTRY glSecondaryColor3f (GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glSecondaryColor3fv (const GLfloat *);
+GLAPI void APIENTRY glSecondaryColor3i (GLint, GLint, GLint);
+GLAPI void APIENTRY glSecondaryColor3iv (const GLint *);
+GLAPI void APIENTRY glSecondaryColor3s (GLshort, GLshort, GLshort);
+GLAPI void APIENTRY glSecondaryColor3sv (const GLshort *);
+GLAPI void APIENTRY glSecondaryColor3ub (GLubyte, GLubyte, GLubyte);
+GLAPI void APIENTRY glSecondaryColor3ubv (const GLubyte *);
+GLAPI void APIENTRY glSecondaryColor3ui (GLuint, GLuint, GLuint);
+GLAPI void APIENTRY glSecondaryColor3uiv (const GLuint *);
+GLAPI void APIENTRY glSecondaryColor3us (GLushort, GLushort, GLushort);
+GLAPI void APIENTRY glSecondaryColor3usv (const GLushort *);
+GLAPI void APIENTRY glSecondaryColorPointer (GLint, GLenum, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glWindowPos2d (GLdouble, GLdouble);
+GLAPI void APIENTRY glWindowPos2dv (const GLdouble *);
+GLAPI void APIENTRY glWindowPos2f (GLfloat, GLfloat);
+GLAPI void APIENTRY glWindowPos2fv (const GLfloat *);
+GLAPI void APIENTRY glWindowPos2i (GLint, GLint);
+GLAPI void APIENTRY glWindowPos2iv (const GLint *);
+GLAPI void APIENTRY glWindowPos2s (GLshort, GLshort);
+GLAPI void APIENTRY glWindowPos2sv (const GLshort *);
+GLAPI void APIENTRY glWindowPos3d (GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glWindowPos3dv (const GLdouble *);
+GLAPI void APIENTRY glWindowPos3f (GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glWindowPos3fv (const GLfloat *);
+GLAPI void APIENTRY glWindowPos3i (GLint, GLint, GLint);
+GLAPI void APIENTRY glWindowPos3iv (const GLint *);
+GLAPI void APIENTRY glWindowPos3s (GLshort, GLshort, GLshort);
+GLAPI void APIENTRY glWindowPos3sv (const GLshort *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLBLENDFUNCSEPARATEPROC) (GLenum sfactorRGB, GLenum dfactorRGB, GLenum sfactorAlpha, GLenum dfactorAlpha);
+typedef void (APIENTRYP PFNGLFOGCOORDFPROC) (GLfloat coord);
+typedef void (APIENTRYP PFNGLFOGCOORDFVPROC) (const GLfloat *coord);
+typedef void (APIENTRYP PFNGLFOGCOORDDPROC) (GLdouble coord);
+typedef void (APIENTRYP PFNGLFOGCOORDDVPROC) (const GLdouble *coord);
+typedef void (APIENTRYP PFNGLFOGCOORDPOINTERPROC) (GLenum type, GLsizei stride, const GLvoid *pointer);
+typedef void (APIENTRYP PFNGLMULTIDRAWARRAYSPROC) (GLenum mode, GLint *first, GLsizei *count, GLsizei primcount);
+typedef void (APIENTRYP PFNGLMULTIDRAWELEMENTSPROC) (GLenum mode, const GLsizei *count, GLenum type, const GLvoid* *indices, GLsizei primcount);
+typedef void (APIENTRYP PFNGLPOINTPARAMETERFPROC) (GLenum pname, GLfloat param);
+typedef void (APIENTRYP PFNGLPOINTPARAMETERFVPROC) (GLenum pname, const GLfloat *params);
+typedef void (APIENTRYP PFNGLPOINTPARAMETERIPROC) (GLenum pname, GLint param);
+typedef void (APIENTRYP PFNGLPOINTPARAMETERIVPROC) (GLenum pname, const GLint *params);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3BPROC) (GLbyte red, GLbyte green, GLbyte blue);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3BVPROC) (const GLbyte *v);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3DPROC) (GLdouble red, GLdouble green, GLdouble blue);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3DVPROC) (const GLdouble *v);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3FPROC) (GLfloat red, GLfloat green, GLfloat blue);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3FVPROC) (const GLfloat *v);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3IPROC) (GLint red, GLint green, GLint blue);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3IVPROC) (const GLint *v);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3SPROC) (GLshort red, GLshort green, GLshort blue);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3SVPROC) (const GLshort *v);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3UBPROC) (GLubyte red, GLubyte green, GLubyte blue);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3UBVPROC) (const GLubyte *v);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3UIPROC) (GLuint red, GLuint green, GLuint blue);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3UIVPROC) (const GLuint *v);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3USPROC) (GLushort red, GLushort green, GLushort blue);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3USVPROC) (const GLushort *v);
+typedef void (APIENTRYP PFNGLSECONDARYCOLORPOINTERPROC) (GLint size, GLenum type, GLsizei stride, const GLvoid *pointer);
+typedef void (APIENTRYP PFNGLWINDOWPOS2DPROC) (GLdouble x, GLdouble y);
+typedef void (APIENTRYP PFNGLWINDOWPOS2DVPROC) (const GLdouble *v);
+typedef void (APIENTRYP PFNGLWINDOWPOS2FPROC) (GLfloat x, GLfloat y);
+typedef void (APIENTRYP PFNGLWINDOWPOS2FVPROC) (const GLfloat *v);
+typedef void (APIENTRYP PFNGLWINDOWPOS2IPROC) (GLint x, GLint y);
+typedef void (APIENTRYP PFNGLWINDOWPOS2IVPROC) (const GLint *v);
+typedef void (APIENTRYP PFNGLWINDOWPOS2SPROC) (GLshort x, GLshort y);
+typedef void (APIENTRYP PFNGLWINDOWPOS2SVPROC) (const GLshort *v);
+typedef void (APIENTRYP PFNGLWINDOWPOS3DPROC) (GLdouble x, GLdouble y, GLdouble z);
+typedef void (APIENTRYP PFNGLWINDOWPOS3DVPROC) (const GLdouble *v);
+typedef void (APIENTRYP PFNGLWINDOWPOS3FPROC) (GLfloat x, GLfloat y, GLfloat z);
+typedef void (APIENTRYP PFNGLWINDOWPOS3FVPROC) (const GLfloat *v);
+typedef void (APIENTRYP PFNGLWINDOWPOS3IPROC) (GLint x, GLint y, GLint z);
+typedef void (APIENTRYP PFNGLWINDOWPOS3IVPROC) (const GLint *v);
+typedef void (APIENTRYP PFNGLWINDOWPOS3SPROC) (GLshort x, GLshort y, GLshort z);
+typedef void (APIENTRYP PFNGLWINDOWPOS3SVPROC) (const GLshort *v);
+#endif
+
+#ifndef GL_VERSION_1_5
+#define GL_VERSION_1_5 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glGenQueries (GLsizei, GLuint *);
+GLAPI void APIENTRY glDeleteQueries (GLsizei, const GLuint *);
+GLAPI GLboolean APIENTRY glIsQuery (GLuint);
+GLAPI void APIENTRY glBeginQuery (GLenum, GLuint);
+GLAPI void APIENTRY glEndQuery (GLenum);
+GLAPI void APIENTRY glGetQueryiv (GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glGetQueryObjectiv (GLuint, GLenum, GLint *);
+GLAPI void APIENTRY glGetQueryObjectuiv (GLuint, GLenum, GLuint *);
+GLAPI void APIENTRY glBindBuffer (GLenum, GLuint);
+GLAPI void APIENTRY glDeleteBuffers (GLsizei, const GLuint *);
+GLAPI void APIENTRY glGenBuffers (GLsizei, GLuint *);
+GLAPI GLboolean APIENTRY glIsBuffer (GLuint);
+GLAPI void APIENTRY glBufferData (GLenum, GLsizeiptr, const GLvoid *, GLenum);
+GLAPI void APIENTRY glBufferSubData (GLenum, GLintptr, GLsizeiptr, const GLvoid *);
+GLAPI void APIENTRY glGetBufferSubData (GLenum, GLintptr, GLsizeiptr, GLvoid *);
+GLAPI GLvoid* APIENTRY glMapBuffer (GLenum, GLenum);
+GLAPI GLboolean APIENTRY glUnmapBuffer (GLenum);
+GLAPI void APIENTRY glGetBufferParameteriv (GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glGetBufferPointerv (GLenum, GLenum, GLvoid* *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLGENQUERIESPROC) (GLsizei n, GLuint *ids);
+typedef void (APIENTRYP PFNGLDELETEQUERIESPROC) (GLsizei n, const GLuint *ids);
+typedef GLboolean (APIENTRYP PFNGLISQUERYPROC) (GLuint id);
+typedef void (APIENTRYP PFNGLBEGINQUERYPROC) (GLenum target, GLuint id);
+typedef void (APIENTRYP PFNGLENDQUERYPROC) (GLenum target);
+typedef void (APIENTRYP PFNGLGETQUERYIVPROC) (GLenum target, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETQUERYOBJECTIVPROC) (GLuint id, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETQUERYOBJECTUIVPROC) (GLuint id, GLenum pname, GLuint *params);
+typedef void (APIENTRYP PFNGLBINDBUFFERPROC) (GLenum target, GLuint buffer);
+typedef void (APIENTRYP PFNGLDELETEBUFFERSPROC) (GLsizei n, const GLuint *buffers);
+typedef void (APIENTRYP PFNGLGENBUFFERSPROC) (GLsizei n, GLuint *buffers);
+typedef GLboolean (APIENTRYP PFNGLISBUFFERPROC) (GLuint buffer);
+typedef void (APIENTRYP PFNGLBUFFERDATAPROC) (GLenum target, GLsizeiptr size, const GLvoid *data, GLenum usage);
+typedef void (APIENTRYP PFNGLBUFFERSUBDATAPROC) (GLenum target, GLintptr offset, GLsizeiptr size, const GLvoid *data);
+typedef void (APIENTRYP PFNGLGETBUFFERSUBDATAPROC) (GLenum target, GLintptr offset, GLsizeiptr size, GLvoid *data);
+typedef GLvoid* (APIENTRYP PFNGLMAPBUFFERPROC) (GLenum target, GLenum access);
+typedef GLboolean (APIENTRYP PFNGLUNMAPBUFFERPROC) (GLenum target);
+typedef void (APIENTRYP PFNGLGETBUFFERPARAMETERIVPROC) (GLenum target, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETBUFFERPOINTERVPROC) (GLenum target, GLenum pname, GLvoid* *params);
+#endif
+
+#ifndef GL_VERSION_2_0
+#define GL_VERSION_2_0 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glBlendEquationSeparate (GLenum, GLenum);
+GLAPI void APIENTRY glDrawBuffers (GLsizei, const GLenum *);
+GLAPI void APIENTRY glStencilOpSeparate (GLenum, GLenum, GLenum, GLenum);
+GLAPI void APIENTRY glStencilFuncSeparate (GLenum, GLenum, GLint, GLuint);
+GLAPI void APIENTRY glStencilMaskSeparate (GLenum, GLuint);
+GLAPI void APIENTRY glAttachShader (GLuint, GLuint);
+GLAPI void APIENTRY glBindAttribLocation (GLuint, GLuint, const GLchar *);
+GLAPI void APIENTRY glCompileShader (GLuint);
+GLAPI GLuint APIENTRY glCreateProgram (void);
+GLAPI GLuint APIENTRY glCreateShader (GLenum);
+GLAPI void APIENTRY glDeleteProgram (GLuint);
+GLAPI void APIENTRY glDeleteShader (GLuint);
+GLAPI void APIENTRY glDetachShader (GLuint, GLuint);
+GLAPI void APIENTRY glDisableVertexAttribArray (GLuint);
+GLAPI void APIENTRY glEnableVertexAttribArray (GLuint);
+GLAPI void APIENTRY glGetActiveAttrib (GLuint, GLuint, GLsizei, GLsizei *, GLint *, GLenum *, GLchar *);
+GLAPI void APIENTRY glGetActiveUniform (GLuint, GLuint, GLsizei, GLsizei *, GLint *, GLenum *, GLchar *);
+GLAPI void APIENTRY glGetAttachedShaders (GLuint, GLsizei, GLsizei *, GLuint *);
+GLAPI GLint APIENTRY glGetAttribLocation (GLuint, const GLchar *);
+GLAPI void APIENTRY glGetProgramiv (GLuint, GLenum, GLint *);
+GLAPI void APIENTRY glGetProgramInfoLog (GLuint, GLsizei, GLsizei *, GLchar *);
+GLAPI void APIENTRY glGetShaderiv (GLuint, GLenum, GLint *);
+GLAPI void APIENTRY glGetShaderInfoLog (GLuint, GLsizei, GLsizei *, GLchar *);
+GLAPI void APIENTRY glGetShaderSource (GLuint, GLsizei, GLsizei *, GLchar *);
+GLAPI GLint APIENTRY glGetUniformLocation (GLuint, const GLchar *);
+GLAPI void APIENTRY glGetUniformfv (GLuint, GLint, GLfloat *);
+GLAPI void APIENTRY glGetUniformiv (GLuint, GLint, GLint *);
+GLAPI void APIENTRY glGetVertexAttribdv (GLuint, GLenum, GLdouble *);
+GLAPI void APIENTRY glGetVertexAttribfv (GLuint, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetVertexAttribiv (GLuint, GLenum, GLint *);
+GLAPI void APIENTRY glGetVertexAttribPointerv (GLuint, GLenum, GLvoid* *);
+GLAPI GLboolean APIENTRY glIsProgram (GLuint);
+GLAPI GLboolean APIENTRY glIsShader (GLuint);
+GLAPI void APIENTRY glLinkProgram (GLuint);
+GLAPI void APIENTRY glShaderSource (GLuint, GLsizei, const GLchar* *, const GLint *);
+GLAPI void APIENTRY glUseProgram (GLuint);
+GLAPI void APIENTRY glUniform1f (GLint, GLfloat);
+GLAPI void APIENTRY glUniform2f (GLint, GLfloat, GLfloat);
+GLAPI void APIENTRY glUniform3f (GLint, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glUniform4f (GLint, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glUniform1i (GLint, GLint);
+GLAPI void APIENTRY glUniform2i (GLint, GLint, GLint);
+GLAPI void APIENTRY glUniform3i (GLint, GLint, GLint, GLint);
+GLAPI void APIENTRY glUniform4i (GLint, GLint, GLint, GLint, GLint);
+GLAPI void APIENTRY glUniform1fv (GLint, GLsizei, const GLfloat *);
+GLAPI void APIENTRY glUniform2fv (GLint, GLsizei, const GLfloat *);
+GLAPI void APIENTRY glUniform3fv (GLint, GLsizei, const GLfloat *);
+GLAPI void APIENTRY glUniform4fv (GLint, GLsizei, const GLfloat *);
+GLAPI void APIENTRY glUniform1iv (GLint, GLsizei, const GLint *);
+GLAPI void APIENTRY glUniform2iv (GLint, GLsizei, const GLint *);
+GLAPI void APIENTRY glUniform3iv (GLint, GLsizei, const GLint *);
+GLAPI void APIENTRY glUniform4iv (GLint, GLsizei, const GLint *);
+GLAPI void APIENTRY glUniformMatrix2fv (GLint, GLsizei, GLboolean, const GLfloat *);
+GLAPI void APIENTRY glUniformMatrix3fv (GLint, GLsizei, GLboolean, const GLfloat *);
+GLAPI void APIENTRY glUniformMatrix4fv (GLint, GLsizei, GLboolean, const GLfloat *);
+GLAPI void APIENTRY glValidateProgram (GLuint);
+GLAPI void APIENTRY glVertexAttrib1d (GLuint, GLdouble);
+GLAPI void APIENTRY glVertexAttrib1dv (GLuint, const GLdouble *);
+GLAPI void APIENTRY glVertexAttrib1f (GLuint, GLfloat);
+GLAPI void APIENTRY glVertexAttrib1fv (GLuint, const GLfloat *);
+GLAPI void APIENTRY glVertexAttrib1s (GLuint, GLshort);
+GLAPI void APIENTRY glVertexAttrib1sv (GLuint, const GLshort *);
+GLAPI void APIENTRY glVertexAttrib2d (GLuint, GLdouble, GLdouble);
+GLAPI void APIENTRY glVertexAttrib2dv (GLuint, const GLdouble *);
+GLAPI void APIENTRY glVertexAttrib2f (GLuint, GLfloat, GLfloat);
+GLAPI void APIENTRY glVertexAttrib2fv (GLuint, const GLfloat *);
+GLAPI void APIENTRY glVertexAttrib2s (GLuint, GLshort, GLshort);
+GLAPI void APIENTRY glVertexAttrib2sv (GLuint, const GLshort *);
+GLAPI void APIENTRY glVertexAttrib3d (GLuint, GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glVertexAttrib3dv (GLuint, const GLdouble *);
+GLAPI void APIENTRY glVertexAttrib3f (GLuint, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glVertexAttrib3fv (GLuint, const GLfloat *);
+GLAPI void APIENTRY glVertexAttrib3s (GLuint, GLshort, GLshort, GLshort);
+GLAPI void APIENTRY glVertexAttrib3sv (GLuint, const GLshort *);
+GLAPI void APIENTRY glVertexAttrib4Nbv (GLuint, const GLbyte *);
+GLAPI void APIENTRY glVertexAttrib4Niv (GLuint, const GLint *);
+GLAPI void APIENTRY glVertexAttrib4Nsv (GLuint, const GLshort *);
+GLAPI void APIENTRY glVertexAttrib4Nub (GLuint, GLubyte, GLubyte, GLubyte, GLubyte);
+GLAPI void APIENTRY glVertexAttrib4Nubv (GLuint, const GLubyte *);
+GLAPI void APIENTRY glVertexAttrib4Nuiv (GLuint, const GLuint *);
+GLAPI void APIENTRY glVertexAttrib4Nusv (GLuint, const GLushort *);
+GLAPI void APIENTRY glVertexAttrib4bv (GLuint, const GLbyte *);
+GLAPI void APIENTRY glVertexAttrib4d (GLuint, GLdouble, GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glVertexAttrib4dv (GLuint, const GLdouble *);
+GLAPI void APIENTRY glVertexAttrib4f (GLuint, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glVertexAttrib4fv (GLuint, const GLfloat *);
+GLAPI void APIENTRY glVertexAttrib4iv (GLuint, const GLint *);
+GLAPI void APIENTRY glVertexAttrib4s (GLuint, GLshort, GLshort, GLshort, GLshort);
+GLAPI void APIENTRY glVertexAttrib4sv (GLuint, const GLshort *);
+GLAPI void APIENTRY glVertexAttrib4ubv (GLuint, const GLubyte *);
+GLAPI void APIENTRY glVertexAttrib4uiv (GLuint, const GLuint *);
+GLAPI void APIENTRY glVertexAttrib4usv (GLuint, const GLushort *);
+GLAPI void APIENTRY glVertexAttribPointer (GLuint, GLint, GLenum, GLboolean, GLsizei, const GLvoid *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLBLENDEQUATIONSEPARATEPROC) (GLenum modeRGB, GLenum modeAlpha);
+typedef void (APIENTRYP PFNGLDRAWBUFFERSPROC) (GLsizei n, const GLenum *bufs);
+typedef void (APIENTRYP PFNGLSTENCILOPSEPARATEPROC) (GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass);
+typedef void (APIENTRYP PFNGLSTENCILFUNCSEPARATEPROC) (GLenum frontfunc, GLenum backfunc, GLint ref, GLuint mask);
+typedef void (APIENTRYP PFNGLSTENCILMASKSEPARATEPROC) (GLenum face, GLuint mask);
+typedef void (APIENTRYP PFNGLATTACHSHADERPROC) (GLuint program, GLuint shader);
+typedef void (APIENTRYP PFNGLBINDATTRIBLOCATIONPROC) (GLuint program, GLuint index, const GLchar *name);
+typedef void (APIENTRYP PFNGLCOMPILESHADERPROC) (GLuint shader);
+typedef GLuint (APIENTRYP PFNGLCREATEPROGRAMPROC) (void);
+typedef GLuint (APIENTRYP PFNGLCREATESHADERPROC) (GLenum type);
+typedef void (APIENTRYP PFNGLDELETEPROGRAMPROC) (GLuint program);
+typedef void (APIENTRYP PFNGLDELETESHADERPROC) (GLuint shader);
+typedef void (APIENTRYP PFNGLDETACHSHADERPROC) (GLuint program, GLuint shader);
+typedef void (APIENTRYP PFNGLDISABLEVERTEXATTRIBARRAYPROC) (GLuint index);
+typedef void (APIENTRYP PFNGLENABLEVERTEXATTRIBARRAYPROC) (GLuint index);
+typedef void (APIENTRYP PFNGLGETACTIVEATTRIBPROC) (GLuint program, GLuint index, GLsizei bufSize, GLsizei *length, GLint *size, GLenum *type, GLchar *name);
+typedef void (APIENTRYP PFNGLGETACTIVEUNIFORMPROC) (GLuint program, GLuint index, GLsizei bufSize, GLsizei *length, GLint *size, GLenum *type, GLchar *name);
+typedef void (APIENTRYP PFNGLGETATTACHEDSHADERSPROC) (GLuint program, GLsizei maxcount, GLsizei *count, GLuint *obj);
+typedef GLint (APIENTRYP PFNGLGETATTRIBLOCATIONPROC) (GLuint program, const GLchar *name);
+typedef void (APIENTRYP PFNGLGETPROGRAMIVPROC) (GLuint program, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETPROGRAMINFOLOGPROC) (GLuint program, GLsizei bufSize, GLsizei *length, GLchar *infoLog);
+typedef void (APIENTRYP PFNGLGETSHADERIVPROC) (GLuint shader, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETSHADERINFOLOGPROC) (GLuint shader, GLsizei bufSize, GLsizei *length, GLchar *infoLog);
+typedef void (APIENTRYP PFNGLGETSHADERSOURCEPROC) (GLuint shader, GLsizei bufSize, GLsizei *length, GLchar *source);
+typedef GLint (APIENTRYP PFNGLGETUNIFORMLOCATIONPROC) (GLuint program, const GLchar *name);
+typedef void (APIENTRYP PFNGLGETUNIFORMFVPROC) (GLuint program, GLint location, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETUNIFORMIVPROC) (GLuint program, GLint location, GLint *params);
+typedef void (APIENTRYP PFNGLGETVERTEXATTRIBDVPROC) (GLuint index, GLenum pname, GLdouble *params);
+typedef void (APIENTRYP PFNGLGETVERTEXATTRIBFVPROC) (GLuint index, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETVERTEXATTRIBIVPROC) (GLuint index, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETVERTEXATTRIBPOINTERVPROC) (GLuint index, GLenum pname, GLvoid* *pointer);
+typedef GLboolean (APIENTRYP PFNGLISPROGRAMPROC) (GLuint program);
+typedef GLboolean (APIENTRYP PFNGLISSHADERPROC) (GLuint shader);
+typedef void (APIENTRYP PFNGLLINKPROGRAMPROC) (GLuint program);
+typedef void (APIENTRYP PFNGLSHADERSOURCEPROC) (GLuint shader, GLsizei count, const GLchar* *string, const GLint *length);
+typedef void (APIENTRYP PFNGLUSEPROGRAMPROC) (GLuint program);
+typedef void (APIENTRYP PFNGLUNIFORM1FPROC) (GLint location, GLfloat v0);
+typedef void (APIENTRYP PFNGLUNIFORM2FPROC) (GLint location, GLfloat v0, GLfloat v1);
+typedef void (APIENTRYP PFNGLUNIFORM3FPROC) (GLint location, GLfloat v0, GLfloat v1, GLfloat v2);
+typedef void (APIENTRYP PFNGLUNIFORM4FPROC) (GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3);
+typedef void (APIENTRYP PFNGLUNIFORM1IPROC) (GLint location, GLint v0);
+typedef void (APIENTRYP PFNGLUNIFORM2IPROC) (GLint location, GLint v0, GLint v1);
+typedef void (APIENTRYP PFNGLUNIFORM3IPROC) (GLint location, GLint v0, GLint v1, GLint v2);
+typedef void (APIENTRYP PFNGLUNIFORM4IPROC) (GLint location, GLint v0, GLint v1, GLint v2, GLint v3);
+typedef void (APIENTRYP PFNGLUNIFORM1FVPROC) (GLint location, GLsizei count, const GLfloat *value);
+typedef void (APIENTRYP PFNGLUNIFORM2FVPROC) (GLint location, GLsizei count, const GLfloat *value);
+typedef void (APIENTRYP PFNGLUNIFORM3FVPROC) (GLint location, GLsizei count, const GLfloat *value);
+typedef void (APIENTRYP PFNGLUNIFORM4FVPROC) (GLint location, GLsizei count, const GLfloat *value);
+typedef void (APIENTRYP PFNGLUNIFORM1IVPROC) (GLint location, GLsizei count, const GLint *value);
+typedef void (APIENTRYP PFNGLUNIFORM2IVPROC) (GLint location, GLsizei count, const GLint *value);
+typedef void (APIENTRYP PFNGLUNIFORM3IVPROC) (GLint location, GLsizei count, const GLint *value);
+typedef void (APIENTRYP PFNGLUNIFORM4IVPROC) (GLint location, GLsizei count, const GLint *value);
+typedef void (APIENTRYP PFNGLUNIFORMMATRIX2FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+typedef void (APIENTRYP PFNGLUNIFORMMATRIX3FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+typedef void (APIENTRYP PFNGLUNIFORMMATRIX4FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+typedef void (APIENTRYP PFNGLVALIDATEPROGRAMPROC) (GLuint program);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB1DPROC) (GLuint index, GLdouble x);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB1DVPROC) (GLuint index, const GLdouble *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB1FPROC) (GLuint index, GLfloat x);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB1FVPROC) (GLuint index, const GLfloat *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB1SPROC) (GLuint index, GLshort x);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB1SVPROC) (GLuint index, const GLshort *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB2DPROC) (GLuint index, GLdouble x, GLdouble y);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB2DVPROC) (GLuint index, const GLdouble *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB2FPROC) (GLuint index, GLfloat x, GLfloat y);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB2FVPROC) (GLuint index, const GLfloat *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB2SPROC) (GLuint index, GLshort x, GLshort y);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB2SVPROC) (GLuint index, const GLshort *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB3DPROC) (GLuint index, GLdouble x, GLdouble y, GLdouble z);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB3DVPROC) (GLuint index, const GLdouble *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB3FPROC) (GLuint index, GLfloat x, GLfloat y, GLfloat z);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB3FVPROC) (GLuint index, const GLfloat *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB3SPROC) (GLuint index, GLshort x, GLshort y, GLshort z);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB3SVPROC) (GLuint index, const GLshort *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4NBVPROC) (GLuint index, const GLbyte *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4NIVPROC) (GLuint index, const GLint *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4NSVPROC) (GLuint index, const GLshort *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4NUBPROC) (GLuint index, GLubyte x, GLubyte y, GLubyte z, GLubyte w);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4NUBVPROC) (GLuint index, const GLubyte *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4NUIVPROC) (GLuint index, const GLuint *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4NUSVPROC) (GLuint index, const GLushort *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4BVPROC) (GLuint index, const GLbyte *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4DPROC) (GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4DVPROC) (GLuint index, const GLdouble *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4FPROC) (GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4FVPROC) (GLuint index, const GLfloat *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4IVPROC) (GLuint index, const GLint *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4SPROC) (GLuint index, GLshort x, GLshort y, GLshort z, GLshort w);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4SVPROC) (GLuint index, const GLshort *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4UBVPROC) (GLuint index, const GLubyte *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4UIVPROC) (GLuint index, const GLuint *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4USVPROC) (GLuint index, const GLushort *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBPOINTERPROC) (GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride, const GLvoid *pointer);
+#endif
+
+#ifndef GL_VERSION_2_1
+#define GL_VERSION_2_1 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glUniformMatrix2x3fv (GLint, GLsizei, GLboolean, const GLfloat *);
+GLAPI void APIENTRY glUniformMatrix3x2fv (GLint, GLsizei, GLboolean, const GLfloat *);
+GLAPI void APIENTRY glUniformMatrix2x4fv (GLint, GLsizei, GLboolean, const GLfloat *);
+GLAPI void APIENTRY glUniformMatrix4x2fv (GLint, GLsizei, GLboolean, const GLfloat *);
+GLAPI void APIENTRY glUniformMatrix3x4fv (GLint, GLsizei, GLboolean, const GLfloat *);
+GLAPI void APIENTRY glUniformMatrix4x3fv (GLint, GLsizei, GLboolean, const GLfloat *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLUNIFORMMATRIX2X3FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+typedef void (APIENTRYP PFNGLUNIFORMMATRIX3X2FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+typedef void (APIENTRYP PFNGLUNIFORMMATRIX2X4FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+typedef void (APIENTRYP PFNGLUNIFORMMATRIX4X2FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+typedef void (APIENTRYP PFNGLUNIFORMMATRIX3X4FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+typedef void (APIENTRYP PFNGLUNIFORMMATRIX4X3FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+#endif
+
+#ifndef GL_VERSION_3_0
+#define GL_VERSION_3_0 1
+/* OpenGL 3.0 also reuses entry points from these extensions: */
+/* ARB_framebuffer_object */
+/* ARB_map_buffer_range */
+/* ARB_vertex_array_object */
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glColorMaski (GLuint, GLboolean, GLboolean, GLboolean, GLboolean);
+GLAPI void APIENTRY glGetBooleani_v (GLenum, GLuint, GLboolean *);
+GLAPI void APIENTRY glGetIntegeri_v (GLenum, GLuint, GLint *);
+GLAPI void APIENTRY glEnablei (GLenum, GLuint);
+GLAPI void APIENTRY glDisablei (GLenum, GLuint);
+GLAPI GLboolean APIENTRY glIsEnabledi (GLenum, GLuint);
+GLAPI void APIENTRY glBeginTransformFeedback (GLenum);
+GLAPI void APIENTRY glEndTransformFeedback (void);
+GLAPI void APIENTRY glBindBufferRange (GLenum, GLuint, GLuint, GLintptr, GLsizeiptr);
+GLAPI void APIENTRY glBindBufferBase (GLenum, GLuint, GLuint);
+GLAPI void APIENTRY glTransformFeedbackVaryings (GLuint, GLsizei, const GLchar* *, GLenum);
+GLAPI void APIENTRY glGetTransformFeedbackVarying (GLuint, GLuint, GLsizei, GLsizei *, GLsizei *, GLenum *, GLchar *);
+GLAPI void APIENTRY glClampColor (GLenum, GLenum);
+GLAPI void APIENTRY glBeginConditionalRender (GLuint, GLenum);
+GLAPI void APIENTRY glEndConditionalRender (void);
+GLAPI void APIENTRY glVertexAttribI1i (GLuint, GLint);
+GLAPI void APIENTRY glVertexAttribI2i (GLuint, GLint, GLint);
+GLAPI void APIENTRY glVertexAttribI3i (GLuint, GLint, GLint, GLint);
+GLAPI void APIENTRY glVertexAttribI4i (GLuint, GLint, GLint, GLint, GLint);
+GLAPI void APIENTRY glVertexAttribI1ui (GLuint, GLuint);
+GLAPI void APIENTRY glVertexAttribI2ui (GLuint, GLuint, GLuint);
+GLAPI void APIENTRY glVertexAttribI3ui (GLuint, GLuint, GLuint, GLuint);
+GLAPI void APIENTRY glVertexAttribI4ui (GLuint, GLuint, GLuint, GLuint, GLuint);
+GLAPI void APIENTRY glVertexAttribI1iv (GLuint, const GLint *);
+GLAPI void APIENTRY glVertexAttribI2iv (GLuint, const GLint *);
+GLAPI void APIENTRY glVertexAttribI3iv (GLuint, const GLint *);
+GLAPI void APIENTRY glVertexAttribI4iv (GLuint, const GLint *);
+GLAPI void APIENTRY glVertexAttribI1uiv (GLuint, const GLuint *);
+GLAPI void APIENTRY glVertexAttribI2uiv (GLuint, const GLuint *);
+GLAPI void APIENTRY glVertexAttribI3uiv (GLuint, const GLuint *);
+GLAPI void APIENTRY glVertexAttribI4uiv (GLuint, const GLuint *);
+GLAPI void APIENTRY glVertexAttribI4bv (GLuint, const GLbyte *);
+GLAPI void APIENTRY glVertexAttribI4sv (GLuint, const GLshort *);
+GLAPI void APIENTRY glVertexAttribI4ubv (GLuint, const GLubyte *);
+GLAPI void APIENTRY glVertexAttribI4usv (GLuint, const GLushort *);
+GLAPI void APIENTRY glVertexAttribIPointer (GLuint, GLint, GLenum, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glGetVertexAttribIiv (GLuint, GLenum, GLint *);
+GLAPI void APIENTRY glGetVertexAttribIuiv (GLuint, GLenum, GLuint *);
+GLAPI void APIENTRY glGetUniformuiv (GLuint, GLint, GLuint *);
+GLAPI void APIENTRY glBindFragDataLocation (GLuint, GLuint, const GLchar *);
+GLAPI GLint APIENTRY glGetFragDataLocation (GLuint, const GLchar *);
+GLAPI void APIENTRY glUniform1ui (GLint, GLuint);
+GLAPI void APIENTRY glUniform2ui (GLint, GLuint, GLuint);
+GLAPI void APIENTRY glUniform3ui (GLint, GLuint, GLuint, GLuint);
+GLAPI void APIENTRY glUniform4ui (GLint, GLuint, GLuint, GLuint, GLuint);
+GLAPI void APIENTRY glUniform1uiv (GLint, GLsizei, const GLuint *);
+GLAPI void APIENTRY glUniform2uiv (GLint, GLsizei, const GLuint *);
+GLAPI void APIENTRY glUniform3uiv (GLint, GLsizei, const GLuint *);
+GLAPI void APIENTRY glUniform4uiv (GLint, GLsizei, const GLuint *);
+GLAPI void APIENTRY glTexParameterIiv (GLenum, GLenum, const GLint *);
+GLAPI void APIENTRY glTexParameterIuiv (GLenum, GLenum, const GLuint *);
+GLAPI void APIENTRY glGetTexParameterIiv (GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glGetTexParameterIuiv (GLenum, GLenum, GLuint *);
+GLAPI void APIENTRY glClearBufferiv (GLenum, GLint, const GLint *);
+GLAPI void APIENTRY glClearBufferuiv (GLenum, GLint, const GLuint *);
+GLAPI void APIENTRY glClearBufferfv (GLenum, GLint, const GLfloat *);
+GLAPI void APIENTRY glClearBufferfi (GLenum, GLint, GLfloat, GLint);
+GLAPI const GLubyte * APIENTRY glGetStringi (GLenum, GLuint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLCOLORMASKIPROC) (GLuint index, GLboolean r, GLboolean g, GLboolean b, GLboolean a);
+typedef void (APIENTRYP PFNGLGETBOOLEANI_VPROC) (GLenum target, GLuint index, GLboolean *data);
+typedef void (APIENTRYP PFNGLGETINTEGERI_VPROC) (GLenum target, GLuint index, GLint *data);
+typedef void (APIENTRYP PFNGLENABLEIPROC) (GLenum target, GLuint index);
+typedef void (APIENTRYP PFNGLDISABLEIPROC) (GLenum target, GLuint index);
+typedef GLboolean (APIENTRYP PFNGLISENABLEDIPROC) (GLenum target, GLuint index);
+typedef void (APIENTRYP PFNGLBEGINTRANSFORMFEEDBACKPROC) (GLenum primitiveMode);
+typedef void (APIENTRYP PFNGLENDTRANSFORMFEEDBACKPROC) (void);
+typedef void (APIENTRYP PFNGLBINDBUFFERRANGEPROC) (GLenum target, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size);
+typedef void (APIENTRYP PFNGLBINDBUFFERBASEPROC) (GLenum target, GLuint index, GLuint buffer);
+typedef void (APIENTRYP PFNGLTRANSFORMFEEDBACKVARYINGSPROC) (GLuint program, GLsizei count, const GLchar* *varyings, GLenum bufferMode);
+typedef void (APIENTRYP PFNGLGETTRANSFORMFEEDBACKVARYINGPROC) (GLuint program, GLuint index, GLsizei bufSize, GLsizei *length, GLsizei *size, GLenum *type, GLchar *name);
+typedef void (APIENTRYP PFNGLCLAMPCOLORPROC) (GLenum target, GLenum clamp);
+typedef void (APIENTRYP PFNGLBEGINCONDITIONALRENDERPROC) (GLuint id, GLenum mode);
+typedef void (APIENTRYP PFNGLENDCONDITIONALRENDERPROC) (void);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI1IPROC) (GLuint index, GLint x);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI2IPROC) (GLuint index, GLint x, GLint y);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI3IPROC) (GLuint index, GLint x, GLint y, GLint z);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI4IPROC) (GLuint index, GLint x, GLint y, GLint z, GLint w);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI1UIPROC) (GLuint index, GLuint x);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI2UIPROC) (GLuint index, GLuint x, GLuint y);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI3UIPROC) (GLuint index, GLuint x, GLuint y, GLuint z);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI4UIPROC) (GLuint index, GLuint x, GLuint y, GLuint z, GLuint w);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI1IVPROC) (GLuint index, const GLint *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI2IVPROC) (GLuint index, const GLint *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI3IVPROC) (GLuint index, const GLint *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI4IVPROC) (GLuint index, const GLint *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI1UIVPROC) (GLuint index, const GLuint *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI2UIVPROC) (GLuint index, const GLuint *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI3UIVPROC) (GLuint index, const GLuint *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI4UIVPROC) (GLuint index, const GLuint *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI4BVPROC) (GLuint index, const GLbyte *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI4SVPROC) (GLuint index, const GLshort *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI4UBVPROC) (GLuint index, const GLubyte *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI4USVPROC) (GLuint index, const GLushort *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBIPOINTERPROC) (GLuint index, GLint size, GLenum type, GLsizei stride, const GLvoid *pointer);
+typedef void (APIENTRYP PFNGLGETVERTEXATTRIBIIVPROC) (GLuint index, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETVERTEXATTRIBIUIVPROC) (GLuint index, GLenum pname, GLuint *params);
+typedef void (APIENTRYP PFNGLGETUNIFORMUIVPROC) (GLuint program, GLint location, GLuint *params);
+typedef void (APIENTRYP PFNGLBINDFRAGDATALOCATIONPROC) (GLuint program, GLuint color, const GLchar *name);
+typedef GLint (APIENTRYP PFNGLGETFRAGDATALOCATIONPROC) (GLuint program, const GLchar *name);
+typedef void (APIENTRYP PFNGLUNIFORM1UIPROC) (GLint location, GLuint v0);
+typedef void (APIENTRYP PFNGLUNIFORM2UIPROC) (GLint location, GLuint v0, GLuint v1);
+typedef void (APIENTRYP PFNGLUNIFORM3UIPROC) (GLint location, GLuint v0, GLuint v1, GLuint v2);
+typedef void (APIENTRYP PFNGLUNIFORM4UIPROC) (GLint location, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+typedef void (APIENTRYP PFNGLUNIFORM1UIVPROC) (GLint location, GLsizei count, const GLuint *value);
+typedef void (APIENTRYP PFNGLUNIFORM2UIVPROC) (GLint location, GLsizei count, const GLuint *value);
+typedef void (APIENTRYP PFNGLUNIFORM3UIVPROC) (GLint location, GLsizei count, const GLuint *value);
+typedef void (APIENTRYP PFNGLUNIFORM4UIVPROC) (GLint location, GLsizei count, const GLuint *value);
+typedef void (APIENTRYP PFNGLTEXPARAMETERIIVPROC) (GLenum target, GLenum pname, const GLint *params);
+typedef void (APIENTRYP PFNGLTEXPARAMETERIUIVPROC) (GLenum target, GLenum pname, const GLuint *params);
+typedef void (APIENTRYP PFNGLGETTEXPARAMETERIIVPROC) (GLenum target, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETTEXPARAMETERIUIVPROC) (GLenum target, GLenum pname, GLuint *params);
+typedef void (APIENTRYP PFNGLCLEARBUFFERIVPROC) (GLenum buffer, GLint drawbuffer, const GLint *value);
+typedef void (APIENTRYP PFNGLCLEARBUFFERUIVPROC) (GLenum buffer, GLint drawbuffer, const GLuint *value);
+typedef void (APIENTRYP PFNGLCLEARBUFFERFVPROC) (GLenum buffer, GLint drawbuffer, const GLfloat *value);
+typedef void (APIENTRYP PFNGLCLEARBUFFERFIPROC) (GLenum buffer, GLint drawbuffer, GLfloat depth, GLint stencil);
+typedef const GLubyte * (APIENTRYP PFNGLGETSTRINGIPROC) (GLenum name, GLuint index);
+#endif
+
+#ifndef GL_ARB_multitexture
+#define GL_ARB_multitexture 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glActiveTextureARB (GLenum);
+GLAPI void APIENTRY glClientActiveTextureARB (GLenum);
+GLAPI void APIENTRY glMultiTexCoord1dARB (GLenum, GLdouble);
+GLAPI void APIENTRY glMultiTexCoord1dvARB (GLenum, const GLdouble *);
+GLAPI void APIENTRY glMultiTexCoord1fARB (GLenum, GLfloat);
+GLAPI void APIENTRY glMultiTexCoord1fvARB (GLenum, const GLfloat *);
+GLAPI void APIENTRY glMultiTexCoord1iARB (GLenum, GLint);
+GLAPI void APIENTRY glMultiTexCoord1ivARB (GLenum, const GLint *);
+GLAPI void APIENTRY glMultiTexCoord1sARB (GLenum, GLshort);
+GLAPI void APIENTRY glMultiTexCoord1svARB (GLenum, const GLshort *);
+GLAPI void APIENTRY glMultiTexCoord2dARB (GLenum, GLdouble, GLdouble);
+GLAPI void APIENTRY glMultiTexCoord2dvARB (GLenum, const GLdouble *);
+GLAPI void APIENTRY glMultiTexCoord2fARB (GLenum, GLfloat, GLfloat);
+GLAPI void APIENTRY glMultiTexCoord2fvARB (GLenum, const GLfloat *);
+GLAPI void APIENTRY glMultiTexCoord2iARB (GLenum, GLint, GLint);
+GLAPI void APIENTRY glMultiTexCoord2ivARB (GLenum, const GLint *);
+GLAPI void APIENTRY glMultiTexCoord2sARB (GLenum, GLshort, GLshort);
+GLAPI void APIENTRY glMultiTexCoord2svARB (GLenum, const GLshort *);
+GLAPI void APIENTRY glMultiTexCoord3dARB (GLenum, GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glMultiTexCoord3dvARB (GLenum, const GLdouble *);
+GLAPI void APIENTRY glMultiTexCoord3fARB (GLenum, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glMultiTexCoord3fvARB (GLenum, const GLfloat *);
+GLAPI void APIENTRY glMultiTexCoord3iARB (GLenum, GLint, GLint, GLint);
+GLAPI void APIENTRY glMultiTexCoord3ivARB (GLenum, const GLint *);
+GLAPI void APIENTRY glMultiTexCoord3sARB (GLenum, GLshort, GLshort, GLshort);
+GLAPI void APIENTRY glMultiTexCoord3svARB (GLenum, const GLshort *);
+GLAPI void APIENTRY glMultiTexCoord4dARB (GLenum, GLdouble, GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glMultiTexCoord4dvARB (GLenum, const GLdouble *);
+GLAPI void APIENTRY glMultiTexCoord4fARB (GLenum, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glMultiTexCoord4fvARB (GLenum, const GLfloat *);
+GLAPI void APIENTRY glMultiTexCoord4iARB (GLenum, GLint, GLint, GLint, GLint);
+GLAPI void APIENTRY glMultiTexCoord4ivARB (GLenum, const GLint *);
+GLAPI void APIENTRY glMultiTexCoord4sARB (GLenum, GLshort, GLshort, GLshort, GLshort);
+GLAPI void APIENTRY glMultiTexCoord4svARB (GLenum, const GLshort *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLACTIVETEXTUREARBPROC) (GLenum texture);
+typedef void (APIENTRYP PFNGLCLIENTACTIVETEXTUREARBPROC) (GLenum texture);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD1DARBPROC) (GLenum target, GLdouble s);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD1DVARBPROC) (GLenum target, const GLdouble *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD1FARBPROC) (GLenum target, GLfloat s);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD1FVARBPROC) (GLenum target, const GLfloat *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD1IARBPROC) (GLenum target, GLint s);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD1IVARBPROC) (GLenum target, const GLint *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD1SARBPROC) (GLenum target, GLshort s);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD1SVARBPROC) (GLenum target, const GLshort *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD2DARBPROC) (GLenum target, GLdouble s, GLdouble t);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD2DVARBPROC) (GLenum target, const GLdouble *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD2FARBPROC) (GLenum target, GLfloat s, GLfloat t);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD2FVARBPROC) (GLenum target, const GLfloat *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD2IARBPROC) (GLenum target, GLint s, GLint t);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD2IVARBPROC) (GLenum target, const GLint *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD2SARBPROC) (GLenum target, GLshort s, GLshort t);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD2SVARBPROC) (GLenum target, const GLshort *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD3DARBPROC) (GLenum target, GLdouble s, GLdouble t, GLdouble r);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD3DVARBPROC) (GLenum target, const GLdouble *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD3FARBPROC) (GLenum target, GLfloat s, GLfloat t, GLfloat r);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD3FVARBPROC) (GLenum target, const GLfloat *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD3IARBPROC) (GLenum target, GLint s, GLint t, GLint r);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD3IVARBPROC) (GLenum target, const GLint *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD3SARBPROC) (GLenum target, GLshort s, GLshort t, GLshort r);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD3SVARBPROC) (GLenum target, const GLshort *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD4DARBPROC) (GLenum target, GLdouble s, GLdouble t, GLdouble r, GLdouble q);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD4DVARBPROC) (GLenum target, const GLdouble *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD4FARBPROC) (GLenum target, GLfloat s, GLfloat t, GLfloat r, GLfloat q);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD4FVARBPROC) (GLenum target, const GLfloat *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD4IARBPROC) (GLenum target, GLint s, GLint t, GLint r, GLint q);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD4IVARBPROC) (GLenum target, const GLint *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD4SARBPROC) (GLenum target, GLshort s, GLshort t, GLshort r, GLshort q);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD4SVARBPROC) (GLenum target, const GLshort *v);
+#endif
+
+#ifndef GL_ARB_transpose_matrix
+#define GL_ARB_transpose_matrix 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glLoadTransposeMatrixfARB (const GLfloat *);
+GLAPI void APIENTRY glLoadTransposeMatrixdARB (const GLdouble *);
+GLAPI void APIENTRY glMultTransposeMatrixfARB (const GLfloat *);
+GLAPI void APIENTRY glMultTransposeMatrixdARB (const GLdouble *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLLOADTRANSPOSEMATRIXFARBPROC) (const GLfloat *m);
+typedef void (APIENTRYP PFNGLLOADTRANSPOSEMATRIXDARBPROC) (const GLdouble *m);
+typedef void (APIENTRYP PFNGLMULTTRANSPOSEMATRIXFARBPROC) (const GLfloat *m);
+typedef void (APIENTRYP PFNGLMULTTRANSPOSEMATRIXDARBPROC) (const GLdouble *m);
+#endif
+
+#ifndef GL_ARB_multisample
+#define GL_ARB_multisample 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glSampleCoverageARB (GLclampf, GLboolean);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLSAMPLECOVERAGEARBPROC) (GLclampf value, GLboolean invert);
+#endif
+
+#ifndef GL_ARB_texture_env_add
+#define GL_ARB_texture_env_add 1
+#endif
+
+#ifndef GL_ARB_texture_cube_map
+#define GL_ARB_texture_cube_map 1
+#endif
+
+#ifndef GL_ARB_texture_compression
+#define GL_ARB_texture_compression 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glCompressedTexImage3DARB (GLenum, GLint, GLenum, GLsizei, GLsizei, GLsizei, GLint, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glCompressedTexImage2DARB (GLenum, GLint, GLenum, GLsizei, GLsizei, GLint, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glCompressedTexImage1DARB (GLenum, GLint, GLenum, GLsizei, GLint, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glCompressedTexSubImage3DARB (GLenum, GLint, GLint, GLint, GLint, GLsizei, GLsizei, GLsizei, GLenum, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glCompressedTexSubImage2DARB (GLenum, GLint, GLint, GLint, GLsizei, GLsizei, GLenum, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glCompressedTexSubImage1DARB (GLenum, GLint, GLint, GLsizei, GLenum, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glGetCompressedTexImageARB (GLenum, GLint, GLvoid *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLCOMPRESSEDTEXIMAGE3DARBPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLsizei imageSize, const GLvoid *data);
+typedef void (APIENTRYP PFNGLCOMPRESSEDTEXIMAGE2DARBPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const GLvoid *data);
+typedef void (APIENTRYP PFNGLCOMPRESSEDTEXIMAGE1DARBPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLint border, GLsizei imageSize, const GLvoid *data);
+typedef void (APIENTRYP PFNGLCOMPRESSEDTEXSUBIMAGE3DARBPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const GLvoid *data);
+typedef void (APIENTRYP PFNGLCOMPRESSEDTEXSUBIMAGE2DARBPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const GLvoid *data);
+typedef void (APIENTRYP PFNGLCOMPRESSEDTEXSUBIMAGE1DARBPROC) (GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLsizei imageSize, const GLvoid *data);
+typedef void (APIENTRYP PFNGLGETCOMPRESSEDTEXIMAGEARBPROC) (GLenum target, GLint level, GLvoid *img);
+#endif
+
+#ifndef GL_ARB_texture_border_clamp
+#define GL_ARB_texture_border_clamp 1
+#endif
+
+#ifndef GL_ARB_point_parameters
+#define GL_ARB_point_parameters 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glPointParameterfARB (GLenum, GLfloat);
+GLAPI void APIENTRY glPointParameterfvARB (GLenum, const GLfloat *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLPOINTPARAMETERFARBPROC) (GLenum pname, GLfloat param);
+typedef void (APIENTRYP PFNGLPOINTPARAMETERFVARBPROC) (GLenum pname, const GLfloat *params);
+#endif
+
+#ifndef GL_ARB_vertex_blend
+#define GL_ARB_vertex_blend 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glWeightbvARB (GLint, const GLbyte *);
+GLAPI void APIENTRY glWeightsvARB (GLint, const GLshort *);
+GLAPI void APIENTRY glWeightivARB (GLint, const GLint *);
+GLAPI void APIENTRY glWeightfvARB (GLint, const GLfloat *);
+GLAPI void APIENTRY glWeightdvARB (GLint, const GLdouble *);
+GLAPI void APIENTRY glWeightubvARB (GLint, const GLubyte *);
+GLAPI void APIENTRY glWeightusvARB (GLint, const GLushort *);
+GLAPI void APIENTRY glWeightuivARB (GLint, const GLuint *);
+GLAPI void APIENTRY glWeightPointerARB (GLint, GLenum, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glVertexBlendARB (GLint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLWEIGHTBVARBPROC) (GLint size, const GLbyte *weights);
+typedef void (APIENTRYP PFNGLWEIGHTSVARBPROC) (GLint size, const GLshort *weights);
+typedef void (APIENTRYP PFNGLWEIGHTIVARBPROC) (GLint size, const GLint *weights);
+typedef void (APIENTRYP PFNGLWEIGHTFVARBPROC) (GLint size, const GLfloat *weights);
+typedef void (APIENTRYP PFNGLWEIGHTDVARBPROC) (GLint size, const GLdouble *weights);
+typedef void (APIENTRYP PFNGLWEIGHTUBVARBPROC) (GLint size, const GLubyte *weights);
+typedef void (APIENTRYP PFNGLWEIGHTUSVARBPROC) (GLint size, const GLushort *weights);
+typedef void (APIENTRYP PFNGLWEIGHTUIVARBPROC) (GLint size, const GLuint *weights);
+typedef void (APIENTRYP PFNGLWEIGHTPOINTERARBPROC) (GLint size, GLenum type, GLsizei stride, const GLvoid *pointer);
+typedef void (APIENTRYP PFNGLVERTEXBLENDARBPROC) (GLint count);
+#endif
+
+#ifndef GL_ARB_matrix_palette
+#define GL_ARB_matrix_palette 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glCurrentPaletteMatrixARB (GLint);
+GLAPI void APIENTRY glMatrixIndexubvARB (GLint, const GLubyte *);
+GLAPI void APIENTRY glMatrixIndexusvARB (GLint, const GLushort *);
+GLAPI void APIENTRY glMatrixIndexuivARB (GLint, const GLuint *);
+GLAPI void APIENTRY glMatrixIndexPointerARB (GLint, GLenum, GLsizei, const GLvoid *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLCURRENTPALETTEMATRIXARBPROC) (GLint index);
+typedef void (APIENTRYP PFNGLMATRIXINDEXUBVARBPROC) (GLint size, const GLubyte *indices);
+typedef void (APIENTRYP PFNGLMATRIXINDEXUSVARBPROC) (GLint size, const GLushort *indices);
+typedef void (APIENTRYP PFNGLMATRIXINDEXUIVARBPROC) (GLint size, const GLuint *indices);
+typedef void (APIENTRYP PFNGLMATRIXINDEXPOINTERARBPROC) (GLint size, GLenum type, GLsizei stride, const GLvoid *pointer);
+#endif
+
+#ifndef GL_ARB_texture_env_combine
+#define GL_ARB_texture_env_combine 1
+#endif
+
+#ifndef GL_ARB_texture_env_crossbar
+#define GL_ARB_texture_env_crossbar 1
+#endif
+
+#ifndef GL_ARB_texture_env_dot3
+#define GL_ARB_texture_env_dot3 1
+#endif
+
+#ifndef GL_ARB_texture_mirrored_repeat
+#define GL_ARB_texture_mirrored_repeat 1
+#endif
+
+#ifndef GL_ARB_depth_texture
+#define GL_ARB_depth_texture 1
+#endif
+
+#ifndef GL_ARB_shadow
+#define GL_ARB_shadow 1
+#endif
+
+#ifndef GL_ARB_shadow_ambient
+#define GL_ARB_shadow_ambient 1
+#endif
+
+#ifndef GL_ARB_window_pos
+#define GL_ARB_window_pos 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glWindowPos2dARB (GLdouble, GLdouble);
+GLAPI void APIENTRY glWindowPos2dvARB (const GLdouble *);
+GLAPI void APIENTRY glWindowPos2fARB (GLfloat, GLfloat);
+GLAPI void APIENTRY glWindowPos2fvARB (const GLfloat *);
+GLAPI void APIENTRY glWindowPos2iARB (GLint, GLint);
+GLAPI void APIENTRY glWindowPos2ivARB (const GLint *);
+GLAPI void APIENTRY glWindowPos2sARB (GLshort, GLshort);
+GLAPI void APIENTRY glWindowPos2svARB (const GLshort *);
+GLAPI void APIENTRY glWindowPos3dARB (GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glWindowPos3dvARB (const GLdouble *);
+GLAPI void APIENTRY glWindowPos3fARB (GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glWindowPos3fvARB (const GLfloat *);
+GLAPI void APIENTRY glWindowPos3iARB (GLint, GLint, GLint);
+GLAPI void APIENTRY glWindowPos3ivARB (const GLint *);
+GLAPI void APIENTRY glWindowPos3sARB (GLshort, GLshort, GLshort);
+GLAPI void APIENTRY glWindowPos3svARB (const GLshort *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLWINDOWPOS2DARBPROC) (GLdouble x, GLdouble y);
+typedef void (APIENTRYP PFNGLWINDOWPOS2DVARBPROC) (const GLdouble *v);
+typedef void (APIENTRYP PFNGLWINDOWPOS2FARBPROC) (GLfloat x, GLfloat y);
+typedef void (APIENTRYP PFNGLWINDOWPOS2FVARBPROC) (const GLfloat *v);
+typedef void (APIENTRYP PFNGLWINDOWPOS2IARBPROC) (GLint x, GLint y);
+typedef void (APIENTRYP PFNGLWINDOWPOS2IVARBPROC) (const GLint *v);
+typedef void (APIENTRYP PFNGLWINDOWPOS2SARBPROC) (GLshort x, GLshort y);
+typedef void (APIENTRYP PFNGLWINDOWPOS2SVARBPROC) (const GLshort *v);
+typedef void (APIENTRYP PFNGLWINDOWPOS3DARBPROC) (GLdouble x, GLdouble y, GLdouble z);
+typedef void (APIENTRYP PFNGLWINDOWPOS3DVARBPROC) (const GLdouble *v);
+typedef void (APIENTRYP PFNGLWINDOWPOS3FARBPROC) (GLfloat x, GLfloat y, GLfloat z);
+typedef void (APIENTRYP PFNGLWINDOWPOS3FVARBPROC) (const GLfloat *v);
+typedef void (APIENTRYP PFNGLWINDOWPOS3IARBPROC) (GLint x, GLint y, GLint z);
+typedef void (APIENTRYP PFNGLWINDOWPOS3IVARBPROC) (const GLint *v);
+typedef void (APIENTRYP PFNGLWINDOWPOS3SARBPROC) (GLshort x, GLshort y, GLshort z);
+typedef void (APIENTRYP PFNGLWINDOWPOS3SVARBPROC) (const GLshort *v);
+#endif
+
+#ifndef GL_ARB_vertex_program
+#define GL_ARB_vertex_program 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glVertexAttrib1dARB (GLuint, GLdouble);
+GLAPI void APIENTRY glVertexAttrib1dvARB (GLuint, const GLdouble *);
+GLAPI void APIENTRY glVertexAttrib1fARB (GLuint, GLfloat);
+GLAPI void APIENTRY glVertexAttrib1fvARB (GLuint, const GLfloat *);
+GLAPI void APIENTRY glVertexAttrib1sARB (GLuint, GLshort);
+GLAPI void APIENTRY glVertexAttrib1svARB (GLuint, const GLshort *);
+GLAPI void APIENTRY glVertexAttrib2dARB (GLuint, GLdouble, GLdouble);
+GLAPI void APIENTRY glVertexAttrib2dvARB (GLuint, const GLdouble *);
+GLAPI void APIENTRY glVertexAttrib2fARB (GLuint, GLfloat, GLfloat);
+GLAPI void APIENTRY glVertexAttrib2fvARB (GLuint, const GLfloat *);
+GLAPI void APIENTRY glVertexAttrib2sARB (GLuint, GLshort, GLshort);
+GLAPI void APIENTRY glVertexAttrib2svARB (GLuint, const GLshort *);
+GLAPI void APIENTRY glVertexAttrib3dARB (GLuint, GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glVertexAttrib3dvARB (GLuint, const GLdouble *);
+GLAPI void APIENTRY glVertexAttrib3fARB (GLuint, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glVertexAttrib3fvARB (GLuint, const GLfloat *);
+GLAPI void APIENTRY glVertexAttrib3sARB (GLuint, GLshort, GLshort, GLshort);
+GLAPI void APIENTRY glVertexAttrib3svARB (GLuint, const GLshort *);
+GLAPI void APIENTRY glVertexAttrib4NbvARB (GLuint, const GLbyte *);
+GLAPI void APIENTRY glVertexAttrib4NivARB (GLuint, const GLint *);
+GLAPI void APIENTRY glVertexAttrib4NsvARB (GLuint, const GLshort *);
+GLAPI void APIENTRY glVertexAttrib4NubARB (GLuint, GLubyte, GLubyte, GLubyte, GLubyte);
+GLAPI void APIENTRY glVertexAttrib4NubvARB (GLuint, const GLubyte *);
+GLAPI void APIENTRY glVertexAttrib4NuivARB (GLuint, const GLuint *);
+GLAPI void APIENTRY glVertexAttrib4NusvARB (GLuint, const GLushort *);
+GLAPI void APIENTRY glVertexAttrib4bvARB (GLuint, const GLbyte *);
+GLAPI void APIENTRY glVertexAttrib4dARB (GLuint, GLdouble, GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glVertexAttrib4dvARB (GLuint, const GLdouble *);
+GLAPI void APIENTRY glVertexAttrib4fARB (GLuint, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glVertexAttrib4fvARB (GLuint, const GLfloat *);
+GLAPI void APIENTRY glVertexAttrib4ivARB (GLuint, const GLint *);
+GLAPI void APIENTRY glVertexAttrib4sARB (GLuint, GLshort, GLshort, GLshort, GLshort);
+GLAPI void APIENTRY glVertexAttrib4svARB (GLuint, const GLshort *);
+GLAPI void APIENTRY glVertexAttrib4ubvARB (GLuint, const GLubyte *);
+GLAPI void APIENTRY glVertexAttrib4uivARB (GLuint, const GLuint *);
+GLAPI void APIENTRY glVertexAttrib4usvARB (GLuint, const GLushort *);
+GLAPI void APIENTRY glVertexAttribPointerARB (GLuint, GLint, GLenum, GLboolean, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glEnableVertexAttribArrayARB (GLuint);
+GLAPI void APIENTRY glDisableVertexAttribArrayARB (GLuint);
+GLAPI void APIENTRY glProgramStringARB (GLenum, GLenum, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glBindProgramARB (GLenum, GLuint);
+GLAPI void APIENTRY glDeleteProgramsARB (GLsizei, const GLuint *);
+GLAPI void APIENTRY glGenProgramsARB (GLsizei, GLuint *);
+GLAPI void APIENTRY glProgramEnvParameter4dARB (GLenum, GLuint, GLdouble, GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glProgramEnvParameter4dvARB (GLenum, GLuint, const GLdouble *);
+GLAPI void APIENTRY glProgramEnvParameter4fARB (GLenum, GLuint, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glProgramEnvParameter4fvARB (GLenum, GLuint, const GLfloat *);
+GLAPI void APIENTRY glProgramLocalParameter4dARB (GLenum, GLuint, GLdouble, GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glProgramLocalParameter4dvARB (GLenum, GLuint, const GLdouble *);
+GLAPI void APIENTRY glProgramLocalParameter4fARB (GLenum, GLuint, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glProgramLocalParameter4fvARB (GLenum, GLuint, const GLfloat *);
+GLAPI void APIENTRY glGetProgramEnvParameterdvARB (GLenum, GLuint, GLdouble *);
+GLAPI void APIENTRY glGetProgramEnvParameterfvARB (GLenum, GLuint, GLfloat *);
+GLAPI void APIENTRY glGetProgramLocalParameterdvARB (GLenum, GLuint, GLdouble *);
+GLAPI void APIENTRY glGetProgramLocalParameterfvARB (GLenum, GLuint, GLfloat *);
+GLAPI void APIENTRY glGetProgramivARB (GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glGetProgramStringARB (GLenum, GLenum, GLvoid *);
+GLAPI void APIENTRY glGetVertexAttribdvARB (GLuint, GLenum, GLdouble *);
+GLAPI void APIENTRY glGetVertexAttribfvARB (GLuint, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetVertexAttribivARB (GLuint, GLenum, GLint *);
+GLAPI void APIENTRY glGetVertexAttribPointervARB (GLuint, GLenum, GLvoid* *);
+GLAPI GLboolean APIENTRY glIsProgramARB (GLuint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLVERTEXATTRIB1DARBPROC) (GLuint index, GLdouble x);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB1DVARBPROC) (GLuint index, const GLdouble *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB1FARBPROC) (GLuint index, GLfloat x);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB1FVARBPROC) (GLuint index, const GLfloat *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB1SARBPROC) (GLuint index, GLshort x);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB1SVARBPROC) (GLuint index, const GLshort *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB2DARBPROC) (GLuint index, GLdouble x, GLdouble y);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB2DVARBPROC) (GLuint index, const GLdouble *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB2FARBPROC) (GLuint index, GLfloat x, GLfloat y);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB2FVARBPROC) (GLuint index, const GLfloat *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB2SARBPROC) (GLuint index, GLshort x, GLshort y);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB2SVARBPROC) (GLuint index, const GLshort *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB3DARBPROC) (GLuint index, GLdouble x, GLdouble y, GLdouble z);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB3DVARBPROC) (GLuint index, const GLdouble *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB3FARBPROC) (GLuint index, GLfloat x, GLfloat y, GLfloat z);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB3FVARBPROC) (GLuint index, const GLfloat *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB3SARBPROC) (GLuint index, GLshort x, GLshort y, GLshort z);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB3SVARBPROC) (GLuint index, const GLshort *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4NBVARBPROC) (GLuint index, const GLbyte *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4NIVARBPROC) (GLuint index, const GLint *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4NSVARBPROC) (GLuint index, const GLshort *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4NUBARBPROC) (GLuint index, GLubyte x, GLubyte y, GLubyte z, GLubyte w);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4NUBVARBPROC) (GLuint index, const GLubyte *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4NUIVARBPROC) (GLuint index, const GLuint *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4NUSVARBPROC) (GLuint index, const GLushort *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4BVARBPROC) (GLuint index, const GLbyte *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4DARBPROC) (GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4DVARBPROC) (GLuint index, const GLdouble *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4FARBPROC) (GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4FVARBPROC) (GLuint index, const GLfloat *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4IVARBPROC) (GLuint index, const GLint *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4SARBPROC) (GLuint index, GLshort x, GLshort y, GLshort z, GLshort w);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4SVARBPROC) (GLuint index, const GLshort *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4UBVARBPROC) (GLuint index, const GLubyte *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4UIVARBPROC) (GLuint index, const GLuint *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4USVARBPROC) (GLuint index, const GLushort *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBPOINTERARBPROC) (GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride, const GLvoid *pointer);
+typedef void (APIENTRYP PFNGLENABLEVERTEXATTRIBARRAYARBPROC) (GLuint index);
+typedef void (APIENTRYP PFNGLDISABLEVERTEXATTRIBARRAYARBPROC) (GLuint index);
+typedef void (APIENTRYP PFNGLPROGRAMSTRINGARBPROC) (GLenum target, GLenum format, GLsizei len, const GLvoid *string);
+typedef void (APIENTRYP PFNGLBINDPROGRAMARBPROC) (GLenum target, GLuint program);
+typedef void (APIENTRYP PFNGLDELETEPROGRAMSARBPROC) (GLsizei n, const GLuint *programs);
+typedef void (APIENTRYP PFNGLGENPROGRAMSARBPROC) (GLsizei n, GLuint *programs);
+typedef void (APIENTRYP PFNGLPROGRAMENVPARAMETER4DARBPROC) (GLenum target, GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w);
+typedef void (APIENTRYP PFNGLPROGRAMENVPARAMETER4DVARBPROC) (GLenum target, GLuint index, const GLdouble *params);
+typedef void (APIENTRYP PFNGLPROGRAMENVPARAMETER4FARBPROC) (GLenum target, GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+typedef void (APIENTRYP PFNGLPROGRAMENVPARAMETER4FVARBPROC) (GLenum target, GLuint index, const GLfloat *params);
+typedef void (APIENTRYP PFNGLPROGRAMLOCALPARAMETER4DARBPROC) (GLenum target, GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w);
+typedef void (APIENTRYP PFNGLPROGRAMLOCALPARAMETER4DVARBPROC) (GLenum target, GLuint index, const GLdouble *params);
+typedef void (APIENTRYP PFNGLPROGRAMLOCALPARAMETER4FARBPROC) (GLenum target, GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+typedef void (APIENTRYP PFNGLPROGRAMLOCALPARAMETER4FVARBPROC) (GLenum target, GLuint index, const GLfloat *params);
+typedef void (APIENTRYP PFNGLGETPROGRAMENVPARAMETERDVARBPROC) (GLenum target, GLuint index, GLdouble *params);
+typedef void (APIENTRYP PFNGLGETPROGRAMENVPARAMETERFVARBPROC) (GLenum target, GLuint index, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETPROGRAMLOCALPARAMETERDVARBPROC) (GLenum target, GLuint index, GLdouble *params);
+typedef void (APIENTRYP PFNGLGETPROGRAMLOCALPARAMETERFVARBPROC) (GLenum target, GLuint index, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETPROGRAMIVARBPROC) (GLenum target, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETPROGRAMSTRINGARBPROC) (GLenum target, GLenum pname, GLvoid *string);
+typedef void (APIENTRYP PFNGLGETVERTEXATTRIBDVARBPROC) (GLuint index, GLenum pname, GLdouble *params);
+typedef void (APIENTRYP PFNGLGETVERTEXATTRIBFVARBPROC) (GLuint index, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETVERTEXATTRIBIVARBPROC) (GLuint index, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETVERTEXATTRIBPOINTERVARBPROC) (GLuint index, GLenum pname, GLvoid* *pointer);
+typedef GLboolean (APIENTRYP PFNGLISPROGRAMARBPROC) (GLuint program);
+#endif
+
+#ifndef GL_ARB_fragment_program
+#define GL_ARB_fragment_program 1
+/* All ARB_fragment_program entry points are shared with ARB_vertex_program. */
+#endif
+
+#ifndef GL_ARB_vertex_buffer_object
+#define GL_ARB_vertex_buffer_object 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glBindBufferARB (GLenum, GLuint);
+GLAPI void APIENTRY glDeleteBuffersARB (GLsizei, const GLuint *);
+GLAPI void APIENTRY glGenBuffersARB (GLsizei, GLuint *);
+GLAPI GLboolean APIENTRY glIsBufferARB (GLuint);
+GLAPI void APIENTRY glBufferDataARB (GLenum, GLsizeiptrARB, const GLvoid *, GLenum);
+GLAPI void APIENTRY glBufferSubDataARB (GLenum, GLintptrARB, GLsizeiptrARB, const GLvoid *);
+GLAPI void APIENTRY glGetBufferSubDataARB (GLenum, GLintptrARB, GLsizeiptrARB, GLvoid *);
+GLAPI GLvoid* APIENTRY glMapBufferARB (GLenum, GLenum);
+GLAPI GLboolean APIENTRY glUnmapBufferARB (GLenum);
+GLAPI void APIENTRY glGetBufferParameterivARB (GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glGetBufferPointervARB (GLenum, GLenum, GLvoid* *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLBINDBUFFERARBPROC) (GLenum target, GLuint buffer);
+typedef void (APIENTRYP PFNGLDELETEBUFFERSARBPROC) (GLsizei n, const GLuint *buffers);
+typedef void (APIENTRYP PFNGLGENBUFFERSARBPROC) (GLsizei n, GLuint *buffers);
+typedef GLboolean (APIENTRYP PFNGLISBUFFERARBPROC) (GLuint buffer);
+typedef void (APIENTRYP PFNGLBUFFERDATAARBPROC) (GLenum target, GLsizeiptrARB size, const GLvoid *data, GLenum usage);
+typedef void (APIENTRYP PFNGLBUFFERSUBDATAARBPROC) (GLenum target, GLintptrARB offset, GLsizeiptrARB size, const GLvoid *data);
+typedef void (APIENTRYP PFNGLGETBUFFERSUBDATAARBPROC) (GLenum target, GLintptrARB offset, GLsizeiptrARB size, GLvoid *data);
+typedef GLvoid* (APIENTRYP PFNGLMAPBUFFERARBPROC) (GLenum target, GLenum access);
+typedef GLboolean (APIENTRYP PFNGLUNMAPBUFFERARBPROC) (GLenum target);
+typedef void (APIENTRYP PFNGLGETBUFFERPARAMETERIVARBPROC) (GLenum target, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETBUFFERPOINTERVARBPROC) (GLenum target, GLenum pname, GLvoid* *params);
+#endif
+
+#ifndef GL_ARB_occlusion_query
+#define GL_ARB_occlusion_query 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glGenQueriesARB (GLsizei, GLuint *);
+GLAPI void APIENTRY glDeleteQueriesARB (GLsizei, const GLuint *);
+GLAPI GLboolean APIENTRY glIsQueryARB (GLuint);
+GLAPI void APIENTRY glBeginQueryARB (GLenum, GLuint);
+GLAPI void APIENTRY glEndQueryARB (GLenum);
+GLAPI void APIENTRY glGetQueryivARB (GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glGetQueryObjectivARB (GLuint, GLenum, GLint *);
+GLAPI void APIENTRY glGetQueryObjectuivARB (GLuint, GLenum, GLuint *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLGENQUERIESARBPROC) (GLsizei n, GLuint *ids);
+typedef void (APIENTRYP PFNGLDELETEQUERIESARBPROC) (GLsizei n, const GLuint *ids);
+typedef GLboolean (APIENTRYP PFNGLISQUERYARBPROC) (GLuint id);
+typedef void (APIENTRYP PFNGLBEGINQUERYARBPROC) (GLenum target, GLuint id);
+typedef void (APIENTRYP PFNGLENDQUERYARBPROC) (GLenum target);
+typedef void (APIENTRYP PFNGLGETQUERYIVARBPROC) (GLenum target, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETQUERYOBJECTIVARBPROC) (GLuint id, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETQUERYOBJECTUIVARBPROC) (GLuint id, GLenum pname, GLuint *params);
+#endif
+
+#ifndef GL_ARB_shader_objects
+#define GL_ARB_shader_objects 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glDeleteObjectARB (GLhandleARB);
+GLAPI GLhandleARB APIENTRY glGetHandleARB (GLenum);
+GLAPI void APIENTRY glDetachObjectARB (GLhandleARB, GLhandleARB);
+GLAPI GLhandleARB APIENTRY glCreateShaderObjectARB (GLenum);
+GLAPI void APIENTRY glShaderSourceARB (GLhandleARB, GLsizei, const GLcharARB* *, const GLint *);
+GLAPI void APIENTRY glCompileShaderARB (GLhandleARB);
+GLAPI GLhandleARB APIENTRY glCreateProgramObjectARB (void);
+GLAPI void APIENTRY glAttachObjectARB (GLhandleARB, GLhandleARB);
+GLAPI void APIENTRY glLinkProgramARB (GLhandleARB);
+GLAPI void APIENTRY glUseProgramObjectARB (GLhandleARB);
+GLAPI void APIENTRY glValidateProgramARB (GLhandleARB);
+GLAPI void APIENTRY glUniform1fARB (GLint, GLfloat);
+GLAPI void APIENTRY glUniform2fARB (GLint, GLfloat, GLfloat);
+GLAPI void APIENTRY glUniform3fARB (GLint, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glUniform4fARB (GLint, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glUniform1iARB (GLint, GLint);
+GLAPI void APIENTRY glUniform2iARB (GLint, GLint, GLint);
+GLAPI void APIENTRY glUniform3iARB (GLint, GLint, GLint, GLint);
+GLAPI void APIENTRY glUniform4iARB (GLint, GLint, GLint, GLint, GLint);
+GLAPI void APIENTRY glUniform1fvARB (GLint, GLsizei, const GLfloat *);
+GLAPI void APIENTRY glUniform2fvARB (GLint, GLsizei, const GLfloat *);
+GLAPI void APIENTRY glUniform3fvARB (GLint, GLsizei, const GLfloat *);
+GLAPI void APIENTRY glUniform4fvARB (GLint, GLsizei, const GLfloat *);
+GLAPI void APIENTRY glUniform1ivARB (GLint, GLsizei, const GLint *);
+GLAPI void APIENTRY glUniform2ivARB (GLint, GLsizei, const GLint *);
+GLAPI void APIENTRY glUniform3ivARB (GLint, GLsizei, const GLint *);
+GLAPI void APIENTRY glUniform4ivARB (GLint, GLsizei, const GLint *);
+GLAPI void APIENTRY glUniformMatrix2fvARB (GLint, GLsizei, GLboolean, const GLfloat *);
+GLAPI void APIENTRY glUniformMatrix3fvARB (GLint, GLsizei, GLboolean, const GLfloat *);
+GLAPI void APIENTRY glUniformMatrix4fvARB (GLint, GLsizei, GLboolean, const GLfloat *);
+GLAPI void APIENTRY glGetObjectParameterfvARB (GLhandleARB, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetObjectParameterivARB (GLhandleARB, GLenum, GLint *);
+GLAPI void APIENTRY glGetInfoLogARB (GLhandleARB, GLsizei, GLsizei *, GLcharARB *);
+GLAPI void APIENTRY glGetAttachedObjectsARB (GLhandleARB, GLsizei, GLsizei *, GLhandleARB *);
+GLAPI GLint APIENTRY glGetUniformLocationARB (GLhandleARB, const GLcharARB *);
+GLAPI void APIENTRY glGetActiveUniformARB (GLhandleARB, GLuint, GLsizei, GLsizei *, GLint *, GLenum *, GLcharARB *);
+GLAPI void APIENTRY glGetUniformfvARB (GLhandleARB, GLint, GLfloat *);
+GLAPI void APIENTRY glGetUniformivARB (GLhandleARB, GLint, GLint *);
+GLAPI void APIENTRY glGetShaderSourceARB (GLhandleARB, GLsizei, GLsizei *, GLcharARB *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLDELETEOBJECTARBPROC) (GLhandleARB obj);
+typedef GLhandleARB (APIENTRYP PFNGLGETHANDLEARBPROC) (GLenum pname);
+typedef void (APIENTRYP PFNGLDETACHOBJECTARBPROC) (GLhandleARB containerObj, GLhandleARB attachedObj);
+typedef GLhandleARB (APIENTRYP PFNGLCREATESHADEROBJECTARBPROC) (GLenum shadertype);
+typedef void (APIENTRYP PFNGLSHADERSOURCEARBPROC) (GLhandleARB shaderObj, GLsizei count, const GLcharARB* *string, const GLint *length);
+typedef void (APIENTRYP PFNGLCOMPILESHADERARBPROC) (GLhandleARB shaderObj);
+typedef GLhandleARB (APIENTRYP PFNGLCREATEPROGRAMOBJECTARBPROC) (void);
+typedef void (APIENTRYP PFNGLATTACHOBJECTARBPROC) (GLhandleARB containerObj, GLhandleARB obj);
+typedef void (APIENTRYP PFNGLLINKPROGRAMARBPROC) (GLhandleARB programObj);
+typedef void (APIENTRYP PFNGLUSEPROGRAMOBJECTARBPROC) (GLhandleARB programObj);
+typedef void (APIENTRYP PFNGLVALIDATEPROGRAMARBPROC) (GLhandleARB programObj);
+typedef void (APIENTRYP PFNGLUNIFORM1FARBPROC) (GLint location, GLfloat v0);
+typedef void (APIENTRYP PFNGLUNIFORM2FARBPROC) (GLint location, GLfloat v0, GLfloat v1);
+typedef void (APIENTRYP PFNGLUNIFORM3FARBPROC) (GLint location, GLfloat v0, GLfloat v1, GLfloat v2);
+typedef void (APIENTRYP PFNGLUNIFORM4FARBPROC) (GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3);
+typedef void (APIENTRYP PFNGLUNIFORM1IARBPROC) (GLint location, GLint v0);
+typedef void (APIENTRYP PFNGLUNIFORM2IARBPROC) (GLint location, GLint v0, GLint v1);
+typedef void (APIENTRYP PFNGLUNIFORM3IARBPROC) (GLint location, GLint v0, GLint v1, GLint v2);
+typedef void (APIENTRYP PFNGLUNIFORM4IARBPROC) (GLint location, GLint v0, GLint v1, GLint v2, GLint v3);
+typedef void (APIENTRYP PFNGLUNIFORM1FVARBPROC) (GLint location, GLsizei count, const GLfloat *value);
+typedef void (APIENTRYP PFNGLUNIFORM2FVARBPROC) (GLint location, GLsizei count, const GLfloat *value);
+typedef void (APIENTRYP PFNGLUNIFORM3FVARBPROC) (GLint location, GLsizei count, const GLfloat *value);
+typedef void (APIENTRYP PFNGLUNIFORM4FVARBPROC) (GLint location, GLsizei count, const GLfloat *value);
+typedef void (APIENTRYP PFNGLUNIFORM1IVARBPROC) (GLint location, GLsizei count, const GLint *value);
+typedef void (APIENTRYP PFNGLUNIFORM2IVARBPROC) (GLint location, GLsizei count, const GLint *value);
+typedef void (APIENTRYP PFNGLUNIFORM3IVARBPROC) (GLint location, GLsizei count, const GLint *value);
+typedef void (APIENTRYP PFNGLUNIFORM4IVARBPROC) (GLint location, GLsizei count, const GLint *value);
+typedef void (APIENTRYP PFNGLUNIFORMMATRIX2FVARBPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+typedef void (APIENTRYP PFNGLUNIFORMMATRIX3FVARBPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+typedef void (APIENTRYP PFNGLUNIFORMMATRIX4FVARBPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+typedef void (APIENTRYP PFNGLGETOBJECTPARAMETERFVARBPROC) (GLhandleARB obj, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETOBJECTPARAMETERIVARBPROC) (GLhandleARB obj, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETINFOLOGARBPROC) (GLhandleARB obj, GLsizei maxLength, GLsizei *length, GLcharARB *infoLog);
+typedef void (APIENTRYP PFNGLGETATTACHEDOBJECTSARBPROC) (GLhandleARB containerObj, GLsizei maxcount, GLsizei *count, GLhandleARB *obj);
+typedef GLint (APIENTRYP PFNGLGETUNIFORMLOCATIONARBPROC) (GLhandleARB programObj, const GLcharARB *name);
+typedef void (APIENTRYP PFNGLGETACTIVEUNIFORMARBPROC) (GLhandleARB programObj, GLuint index, GLsizei maxLength, GLsizei *length, GLint *size, GLenum *type, GLcharARB *name);
+typedef void (APIENTRYP PFNGLGETUNIFORMFVARBPROC) (GLhandleARB programObj, GLint location, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETUNIFORMIVARBPROC) (GLhandleARB programObj, GLint location, GLint *params);
+typedef void (APIENTRYP PFNGLGETSHADERSOURCEARBPROC) (GLhandleARB obj, GLsizei maxLength, GLsizei *length, GLcharARB *source);
+#endif
+
+#ifndef GL_ARB_vertex_shader
+#define GL_ARB_vertex_shader 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glBindAttribLocationARB (GLhandleARB, GLuint, const GLcharARB *);
+GLAPI void APIENTRY glGetActiveAttribARB (GLhandleARB, GLuint, GLsizei, GLsizei *, GLint *, GLenum *, GLcharARB *);
+GLAPI GLint APIENTRY glGetAttribLocationARB (GLhandleARB, const GLcharARB *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLBINDATTRIBLOCATIONARBPROC) (GLhandleARB programObj, GLuint index, const GLcharARB *name);
+typedef void (APIENTRYP PFNGLGETACTIVEATTRIBARBPROC) (GLhandleARB programObj, GLuint index, GLsizei maxLength, GLsizei *length, GLint *size, GLenum *type, GLcharARB *name);
+typedef GLint (APIENTRYP PFNGLGETATTRIBLOCATIONARBPROC) (GLhandleARB programObj, const GLcharARB *name);
+#endif
+
+#ifndef GL_ARB_fragment_shader
+#define GL_ARB_fragment_shader 1
+#endif
+
+#ifndef GL_ARB_shading_language_100
+#define GL_ARB_shading_language_100 1
+#endif
+
+#ifndef GL_ARB_texture_non_power_of_two
+#define GL_ARB_texture_non_power_of_two 1
+#endif
+
+#ifndef GL_ARB_point_sprite
+#define GL_ARB_point_sprite 1
+#endif
+
+#ifndef GL_ARB_fragment_program_shadow
+#define GL_ARB_fragment_program_shadow 1
+#endif
+
+#ifndef GL_ARB_draw_buffers
+#define GL_ARB_draw_buffers 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glDrawBuffersARB (GLsizei, const GLenum *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLDRAWBUFFERSARBPROC) (GLsizei n, const GLenum *bufs);
+#endif
+
+#ifndef GL_ARB_texture_rectangle
+#define GL_ARB_texture_rectangle 1
+#endif
+
+#ifndef GL_ARB_color_buffer_float
+#define GL_ARB_color_buffer_float 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glClampColorARB (GLenum, GLenum);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLCLAMPCOLORARBPROC) (GLenum target, GLenum clamp);
+#endif
+
+#ifndef GL_ARB_half_float_pixel
+#define GL_ARB_half_float_pixel 1
+#endif
+
+#ifndef GL_ARB_texture_float
+#define GL_ARB_texture_float 1
+#endif
+
+#ifndef GL_ARB_pixel_buffer_object
+#define GL_ARB_pixel_buffer_object 1
+#endif
+
+#ifndef GL_ARB_depth_buffer_float
+#define GL_ARB_depth_buffer_float 1
+#endif
+
+#ifndef GL_ARB_draw_instanced
+#define GL_ARB_draw_instanced 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glDrawArraysInstancedARB (GLenum, GLint, GLsizei, GLsizei);
+GLAPI void APIENTRY glDrawElementsInstancedARB (GLenum, GLsizei, GLenum, const GLvoid *, GLsizei);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLDRAWARRAYSINSTANCEDARBPROC) (GLenum mode, GLint first, GLsizei count, GLsizei primcount);
+typedef void (APIENTRYP PFNGLDRAWELEMENTSINSTANCEDARBPROC) (GLenum mode, GLsizei count, GLenum type, const GLvoid *indices, GLsizei primcount);
+#endif
+
+#ifndef GL_ARB_framebuffer_object
+#define GL_ARB_framebuffer_object 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI GLboolean APIENTRY glIsRenderbuffer (GLuint);
+GLAPI void APIENTRY glBindRenderbuffer (GLenum, GLuint);
+GLAPI void APIENTRY glDeleteRenderbuffers (GLsizei, const GLuint *);
+GLAPI void APIENTRY glGenRenderbuffers (GLsizei, GLuint *);
+GLAPI void APIENTRY glRenderbufferStorage (GLenum, GLenum, GLsizei, GLsizei);
+GLAPI void APIENTRY glGetRenderbufferParameteriv (GLenum, GLenum, GLint *);
+GLAPI GLboolean APIENTRY glIsFramebuffer (GLuint);
+GLAPI void APIENTRY glBindFramebuffer (GLenum, GLuint);
+GLAPI void APIENTRY glDeleteFramebuffers (GLsizei, const GLuint *);
+GLAPI void APIENTRY glGenFramebuffers (GLsizei, GLuint *);
+GLAPI GLenum APIENTRY glCheckFramebufferStatus (GLenum);
+GLAPI void APIENTRY glFramebufferTexture1D (GLenum, GLenum, GLenum, GLuint, GLint);
+GLAPI void APIENTRY glFramebufferTexture2D (GLenum, GLenum, GLenum, GLuint, GLint);
+GLAPI void APIENTRY glFramebufferTexture3D (GLenum, GLenum, GLenum, GLuint, GLint, GLint);
+GLAPI void APIENTRY glFramebufferRenderbuffer (GLenum, GLenum, GLenum, GLuint);
+GLAPI void APIENTRY glGetFramebufferAttachmentParameteriv (GLenum, GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glGenerateMipmap (GLenum);
+GLAPI void APIENTRY glBlitFramebuffer (GLint, GLint, GLint, GLint, GLint, GLint, GLint, GLint, GLbitfield, GLenum);
+GLAPI void APIENTRY glRenderbufferStorageMultisample (GLenum, GLsizei, GLenum, GLsizei, GLsizei);
+GLAPI void APIENTRY glFramebufferTextureLayer (GLenum, GLenum, GLuint, GLint, GLint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef GLboolean (APIENTRYP PFNGLISRENDERBUFFERPROC) (GLuint renderbuffer);
+typedef void (APIENTRYP PFNGLBINDRENDERBUFFERPROC) (GLenum target, GLuint renderbuffer);
+typedef void (APIENTRYP PFNGLDELETERENDERBUFFERSPROC) (GLsizei n, const GLuint *renderbuffers);
+typedef void (APIENTRYP PFNGLGENRENDERBUFFERSPROC) (GLsizei n, GLuint *renderbuffers);
+typedef void (APIENTRYP PFNGLRENDERBUFFERSTORAGEPROC) (GLenum target, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (APIENTRYP PFNGLGETRENDERBUFFERPARAMETERIVPROC) (GLenum target, GLenum pname, GLint *params);
+typedef GLboolean (APIENTRYP PFNGLISFRAMEBUFFERPROC) (GLuint framebuffer);
+typedef void (APIENTRYP PFNGLBINDFRAMEBUFFERPROC) (GLenum target, GLuint framebuffer);
+typedef void (APIENTRYP PFNGLDELETEFRAMEBUFFERSPROC) (GLsizei n, const GLuint *framebuffers);
+typedef void (APIENTRYP PFNGLGENFRAMEBUFFERSPROC) (GLsizei n, GLuint *framebuffers);
+typedef GLenum (APIENTRYP PFNGLCHECKFRAMEBUFFERSTATUSPROC) (GLenum target);
+typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURE1DPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
+typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURE2DPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
+typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURE3DPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLint zoffset);
+typedef void (APIENTRYP PFNGLFRAMEBUFFERRENDERBUFFERPROC) (GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer);
+typedef void (APIENTRYP PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC) (GLenum target, GLenum attachment, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGENERATEMIPMAPPROC) (GLenum target);
+typedef void (APIENTRYP PFNGLBLITFRAMEBUFFERPROC) (GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter);
+typedef void (APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLEPROC) (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURELAYERPROC) (GLenum target, GLenum attachment, GLuint texture, GLint level, GLint layer);
+#endif
+
+#ifndef GL_ARB_framebuffer_sRGB
+#define GL_ARB_framebuffer_sRGB 1
+#endif
+
+#ifndef GL_ARB_geometry_shader4
+#define GL_ARB_geometry_shader4 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glProgramParameteriARB (GLuint, GLenum, GLint);
+GLAPI void APIENTRY glFramebufferTextureARB (GLenum, GLenum, GLuint, GLint);
+GLAPI void APIENTRY glFramebufferTextureLayerARB (GLenum, GLenum, GLuint, GLint, GLint);
+GLAPI void APIENTRY glFramebufferTextureFaceARB (GLenum, GLenum, GLuint, GLint, GLenum);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLPROGRAMPARAMETERIARBPROC) (GLuint program, GLenum pname, GLint value);
+typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTUREARBPROC) (GLenum target, GLenum attachment, GLuint texture, GLint level);
+typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURELAYERARBPROC) (GLenum target, GLenum attachment, GLuint texture, GLint level, GLint layer);
+typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTUREFACEARBPROC) (GLenum target, GLenum attachment, GLuint texture, GLint level, GLenum face);
+#endif
+
+#ifndef GL_ARB_half_float_vertex
+#define GL_ARB_half_float_vertex 1
+#endif
+
+#ifndef GL_ARB_instanced_arrays
+#define GL_ARB_instanced_arrays 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glVertexAttribDivisor (GLuint, GLuint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLVERTEXATTRIBDIVISORPROC) (GLuint index, GLuint divisor);
+#endif
+
+#ifndef GL_ARB_map_buffer_range
+#define GL_ARB_map_buffer_range 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI GLvoid* APIENTRY glMapBufferRange (GLenum, GLintptr, GLsizeiptr, GLbitfield);
+GLAPI void APIENTRY glFlushMappedBufferRange (GLenum, GLintptr, GLsizeiptr);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef GLvoid* (APIENTRYP PFNGLMAPBUFFERRANGEPROC) (GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access);
+typedef void (APIENTRYP PFNGLFLUSHMAPPEDBUFFERRANGEPROC) (GLenum target, GLintptr offset, GLsizeiptr length);
+#endif
+
+#ifndef GL_ARB_texture_buffer_object
+#define GL_ARB_texture_buffer_object 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glTexBufferARB (GLenum, GLenum, GLuint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLTEXBUFFERARBPROC) (GLenum target, GLenum internalformat, GLuint buffer);
+#endif
+
+#ifndef GL_ARB_texture_compression_rgtc
+#define GL_ARB_texture_compression_rgtc 1
+#endif
+
+#ifndef GL_ARB_texture_rg
+#define GL_ARB_texture_rg 1
+#endif
+
+#ifndef GL_ARB_vertex_array_object
+#define GL_ARB_vertex_array_object 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glBindVertexArray (GLuint);
+GLAPI void APIENTRY glDeleteVertexArrays (GLsizei, const GLuint *);
+GLAPI void APIENTRY glGenVertexArrays (GLsizei, GLuint *);
+GLAPI GLboolean APIENTRY glIsVertexArray (GLuint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLBINDVERTEXARRAYPROC) (GLuint array);
+typedef void (APIENTRYP PFNGLDELETEVERTEXARRAYSPROC) (GLsizei n, const GLuint *arrays);
+typedef void (APIENTRYP PFNGLGENVERTEXARRAYSPROC) (GLsizei n, GLuint *arrays);
+typedef GLboolean (APIENTRYP PFNGLISVERTEXARRAYPROC) (GLuint array);
+#endif
+
+#ifndef GL_ARB_uniform_buffer_object
+#define GL_ARB_uniform_buffer_object 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glGetUniformIndices (GLuint, GLsizei, const GLchar* *, GLuint *);
+GLAPI void APIENTRY glGetActiveUniformsiv (GLuint, GLsizei, const GLuint *, GLenum, GLint *);
+GLAPI void APIENTRY glGetActiveUniformName (GLuint, GLuint, GLsizei, GLsizei *, GLchar *);
+GLAPI GLuint APIENTRY glGetUniformBlockIndex (GLuint, const GLchar *);
+GLAPI void APIENTRY glGetActiveUniformBlockiv (GLuint, GLuint, GLenum, GLint *);
+GLAPI void APIENTRY glGetActiveUniformBlockName (GLuint, GLuint, GLsizei, GLsizei *, GLchar *);
+GLAPI void APIENTRY glUniformBlockBinding (GLuint, GLuint, GLuint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLGETUNIFORMINDICESPROC) (GLuint program, GLsizei uniformcount, const GLchar* *uniformNames, GLuint *uniformIndices);
+typedef void (APIENTRYP PFNGLGETACTIVEUNIFORMSIVPROC) (GLuint program, GLsizei uniformcount, const GLuint *uniformIndices, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETACTIVEUNIFORMNAMEPROC) (GLuint program, GLuint uniformIndex, GLsizei bufSize, GLsizei *length, GLchar *uniformName);
+typedef GLuint (APIENTRYP PFNGLGETUNIFORMBLOCKINDEXPROC) (GLuint program, const GLchar *uniformBlockName);
+typedef void (APIENTRYP PFNGLGETACTIVEUNIFORMBLOCKIVPROC) (GLuint program, GLuint uniformBlockIndex, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETACTIVEUNIFORMBLOCKNAMEPROC) (GLuint program, GLuint uniformBlockIndex, GLsizei bufSize, GLsizei *length, GLchar *uniformBlockName);
+typedef void (APIENTRYP PFNGLUNIFORMBLOCKBINDINGPROC) (GLuint program, GLuint uniformBlockIndex, GLuint uniformBlockBinding);
+#endif
+
+#ifndef GL_ARB_compatibility
+#define GL_ARB_compatibility 1
+#endif
+
+#ifndef GL_ARB_copy_buffer
+#define GL_ARB_copy_buffer 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glCopyBufferSubData (GLenum, GLenum, GLintptr, GLintptr, GLsizeiptr);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLCOPYBUFFERSUBDATAPROC) (GLenum readTarget, GLenum writeTarget, GLintptr readoffset, GLintptr writeoffset, GLsizeiptr size);
+#endif
+
+#ifndef GL_EXT_abgr
+#define GL_EXT_abgr 1
+#endif
+
+#ifndef GL_EXT_blend_color
+#define GL_EXT_blend_color 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glBlendColorEXT (GLclampf, GLclampf, GLclampf, GLclampf);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLBLENDCOLOREXTPROC) (GLclampf red, GLclampf green, GLclampf blue, GLclampf alpha);
+#endif
+
+#ifndef GL_EXT_polygon_offset
+#define GL_EXT_polygon_offset 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glPolygonoffsetEXT (GLfloat, GLfloat);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLPOLYGONOFFSETEXTPROC) (GLfloat factor, GLfloat bias);
+#endif
+
+#ifndef GL_EXT_texture
+#define GL_EXT_texture 1
+#endif
+
+#ifndef GL_EXT_texture3D
+#define GL_EXT_texture3D 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glTexImage3DEXT (GLenum, GLint, GLenum, GLsizei, GLsizei, GLsizei, GLint, GLenum, GLenum, const GLvoid *);
+GLAPI void APIENTRY glTexSubImage3DEXT (GLenum, GLint, GLint, GLint, GLint, GLsizei, GLsizei, GLsizei, GLenum, GLenum, const GLvoid *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLTEXIMAGE3DEXTPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const GLvoid *pixels);
+typedef void (APIENTRYP PFNGLTEXSUBIMAGE3DEXTPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const GLvoid *pixels);
+#endif
+
+#ifndef GL_SGIS_texture_filter4
+#define GL_SGIS_texture_filter4 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glGetTexFilterFuncSGIS (GLenum, GLenum, GLfloat *);
+GLAPI void APIENTRY glTexFilterFuncSGIS (GLenum, GLenum, GLsizei, const GLfloat *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLGETTEXFILTERFUNCSGISPROC) (GLenum target, GLenum filter, GLfloat *weights);
+typedef void (APIENTRYP PFNGLTEXFILTERFUNCSGISPROC) (GLenum target, GLenum filter, GLsizei n, const GLfloat *weights);
+#endif
+
+#ifndef GL_EXT_subtexture
+#define GL_EXT_subtexture 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glTexSubImage1DEXT (GLenum, GLint, GLint, GLsizei, GLenum, GLenum, const GLvoid *);
+GLAPI void APIENTRY glTexSubImage2DEXT (GLenum, GLint, GLint, GLint, GLsizei, GLsizei, GLenum, GLenum, const GLvoid *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLTEXSUBIMAGE1DEXTPROC) (GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLenum type, const GLvoid *pixels);
+typedef void (APIENTRYP PFNGLTEXSUBIMAGE2DEXTPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *pixels);
+#endif
+
+#ifndef GL_EXT_copy_texture
+#define GL_EXT_copy_texture 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glCopyTexImage1DEXT (GLenum, GLint, GLenum, GLint, GLint, GLsizei, GLint);
+GLAPI void APIENTRY glCopyTexImage2DEXT (GLenum, GLint, GLenum, GLint, GLint, GLsizei, GLsizei, GLint);
+GLAPI void APIENTRY glCopyTexSubImage1DEXT (GLenum, GLint, GLint, GLint, GLint, GLsizei);
+GLAPI void APIENTRY glCopyTexSubImage2DEXT (GLenum, GLint, GLint, GLint, GLint, GLint, GLsizei, GLsizei);
+GLAPI void APIENTRY glCopyTexSubImage3DEXT (GLenum, GLint, GLint, GLint, GLint, GLint, GLint, GLsizei, GLsizei);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLCOPYTEXIMAGE1DEXTPROC) (GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLint border);
+typedef void (APIENTRYP PFNGLCOPYTEXIMAGE2DEXTPROC) (GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height, GLint border);
+typedef void (APIENTRYP PFNGLCOPYTEXSUBIMAGE1DEXTPROC) (GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width);
+typedef void (APIENTRYP PFNGLCOPYTEXSUBIMAGE2DEXTPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height);
+typedef void (APIENTRYP PFNGLCOPYTEXSUBIMAGE3DEXTPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height);
+#endif
+
+#ifndef GL_EXT_histogram
+#define GL_EXT_histogram 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glGetHistogramEXT (GLenum, GLboolean, GLenum, GLenum, GLvoid *);
+GLAPI void APIENTRY glGetHistogramParameterfvEXT (GLenum, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetHistogramParameterivEXT (GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glGetMinmaxEXT (GLenum, GLboolean, GLenum, GLenum, GLvoid *);
+GLAPI void APIENTRY glGetMinmaxParameterfvEXT (GLenum, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetMinmaxParameterivEXT (GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glHistogramEXT (GLenum, GLsizei, GLenum, GLboolean);
+GLAPI void APIENTRY glMinmaxEXT (GLenum, GLenum, GLboolean);
+GLAPI void APIENTRY glResetHistogramEXT (GLenum);
+GLAPI void APIENTRY glResetMinmaxEXT (GLenum);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLGETHISTOGRAMEXTPROC) (GLenum target, GLboolean reset, GLenum format, GLenum type, GLvoid *values);
+typedef void (APIENTRYP PFNGLGETHISTOGRAMPARAMETERFVEXTPROC) (GLenum target, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETHISTOGRAMPARAMETERIVEXTPROC) (GLenum target, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETMINMAXEXTPROC) (GLenum target, GLboolean reset, GLenum format, GLenum type, GLvoid *values);
+typedef void (APIENTRYP PFNGLGETMINMAXPARAMETERFVEXTPROC) (GLenum target, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETMINMAXPARAMETERIVEXTPROC) (GLenum target, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLHISTOGRAMEXTPROC) (GLenum target, GLsizei width, GLenum internalformat, GLboolean sink);
+typedef void (APIENTRYP PFNGLMINMAXEXTPROC) (GLenum target, GLenum internalformat, GLboolean sink);
+typedef void (APIENTRYP PFNGLRESETHISTOGRAMEXTPROC) (GLenum target);
+typedef void (APIENTRYP PFNGLRESETMINMAXEXTPROC) (GLenum target);
+#endif
+
+#ifndef GL_EXT_convolution
+#define GL_EXT_convolution 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glConvolutionFilter1DEXT (GLenum, GLenum, GLsizei, GLenum, GLenum, const GLvoid *);
+GLAPI void APIENTRY glConvolutionFilter2DEXT (GLenum, GLenum, GLsizei, GLsizei, GLenum, GLenum, const GLvoid *);
+GLAPI void APIENTRY glConvolutionParameterfEXT (GLenum, GLenum, GLfloat);
+GLAPI void APIENTRY glConvolutionParameterfvEXT (GLenum, GLenum, const GLfloat *);
+GLAPI void APIENTRY glConvolutionParameteriEXT (GLenum, GLenum, GLint);
+GLAPI void APIENTRY glConvolutionParameterivEXT (GLenum, GLenum, const GLint *);
+GLAPI void APIENTRY glCopyConvolutionFilter1DEXT (GLenum, GLenum, GLint, GLint, GLsizei);
+GLAPI void APIENTRY glCopyConvolutionFilter2DEXT (GLenum, GLenum, GLint, GLint, GLsizei, GLsizei);
+GLAPI void APIENTRY glGetConvolutionFilterEXT (GLenum, GLenum, GLenum, GLvoid *);
+GLAPI void APIENTRY glGetConvolutionParameterfvEXT (GLenum, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetConvolutionParameterivEXT (GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glGetSeparableFilterEXT (GLenum, GLenum, GLenum, GLvoid *, GLvoid *, GLvoid *);
+GLAPI void APIENTRY glSeparableFilter2DEXT (GLenum, GLenum, GLsizei, GLsizei, GLenum, GLenum, const GLvoid *, const GLvoid *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLCONVOLUTIONFILTER1DEXTPROC) (GLenum target, GLenum internalformat, GLsizei width, GLenum format, GLenum type, const GLvoid *image);
+typedef void (APIENTRYP PFNGLCONVOLUTIONFILTER2DEXTPROC) (GLenum target, GLenum internalformat, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *image);
+typedef void (APIENTRYP PFNGLCONVOLUTIONPARAMETERFEXTPROC) (GLenum target, GLenum pname, GLfloat params);
+typedef void (APIENTRYP PFNGLCONVOLUTIONPARAMETERFVEXTPROC) (GLenum target, GLenum pname, const GLfloat *params);
+typedef void (APIENTRYP PFNGLCONVOLUTIONPARAMETERIEXTPROC) (GLenum target, GLenum pname, GLint params);
+typedef void (APIENTRYP PFNGLCONVOLUTIONPARAMETERIVEXTPROC) (GLenum target, GLenum pname, const GLint *params);
+typedef void (APIENTRYP PFNGLCOPYCONVOLUTIONFILTER1DEXTPROC) (GLenum target, GLenum internalformat, GLint x, GLint y, GLsizei width);
+typedef void (APIENTRYP PFNGLCOPYCONVOLUTIONFILTER2DEXTPROC) (GLenum target, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height);
+typedef void (APIENTRYP PFNGLGETCONVOLUTIONFILTEREXTPROC) (GLenum target, GLenum format, GLenum type, GLvoid *image);
+typedef void (APIENTRYP PFNGLGETCONVOLUTIONPARAMETERFVEXTPROC) (GLenum target, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETCONVOLUTIONPARAMETERIVEXTPROC) (GLenum target, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETSEPARABLEFILTEREXTPROC) (GLenum target, GLenum format, GLenum type, GLvoid *row, GLvoid *column, GLvoid *span);
+typedef void (APIENTRYP PFNGLSEPARABLEFILTER2DEXTPROC) (GLenum target, GLenum internalformat, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *row, const GLvoid *column);
+#endif
+
+#ifndef GL_SGI_color_matrix
+#define GL_SGI_color_matrix 1
+#endif
+
+#ifndef GL_SGI_color_table
+#define GL_SGI_color_table 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glColorTableSGI (GLenum, GLenum, GLsizei, GLenum, GLenum, const GLvoid *);
+GLAPI void APIENTRY glColorTableParameterfvSGI (GLenum, GLenum, const GLfloat *);
+GLAPI void APIENTRY glColorTableParameterivSGI (GLenum, GLenum, const GLint *);
+GLAPI void APIENTRY glCopyColorTableSGI (GLenum, GLenum, GLint, GLint, GLsizei);
+GLAPI void APIENTRY glGetColorTableSGI (GLenum, GLenum, GLenum, GLvoid *);
+GLAPI void APIENTRY glGetColorTableParameterfvSGI (GLenum, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetColorTableParameterivSGI (GLenum, GLenum, GLint *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLCOLORTABLESGIPROC) (GLenum target, GLenum internalformat, GLsizei width, GLenum format, GLenum type, const GLvoid *table);
+typedef void (APIENTRYP PFNGLCOLORTABLEPARAMETERFVSGIPROC) (GLenum target, GLenum pname, const GLfloat *params);
+typedef void (APIENTRYP PFNGLCOLORTABLEPARAMETERIVSGIPROC) (GLenum target, GLenum pname, const GLint *params);
+typedef void (APIENTRYP PFNGLCOPYCOLORTABLESGIPROC) (GLenum target, GLenum internalformat, GLint x, GLint y, GLsizei width);
+typedef void (APIENTRYP PFNGLGETCOLORTABLESGIPROC) (GLenum target, GLenum format, GLenum type, GLvoid *table);
+typedef void (APIENTRYP PFNGLGETCOLORTABLEPARAMETERFVSGIPROC) (GLenum target, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETCOLORTABLEPARAMETERIVSGIPROC) (GLenum target, GLenum pname, GLint *params);
+#endif
+
+#ifndef GL_SGIX_pixel_texture
+#define GL_SGIX_pixel_texture 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glPixelTexGenSGIX (GLenum);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLPIXELTEXGENSGIXPROC) (GLenum mode);
+#endif
+
+#ifndef GL_SGIS_pixel_texture
+#define GL_SGIS_pixel_texture 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glPixelTexGenParameteriSGIS (GLenum, GLint);
+GLAPI void APIENTRY glPixelTexGenParameterivSGIS (GLenum, const GLint *);
+GLAPI void APIENTRY glPixelTexGenParameterfSGIS (GLenum, GLfloat);
+GLAPI void APIENTRY glPixelTexGenParameterfvSGIS (GLenum, const GLfloat *);
+GLAPI void APIENTRY glGetPixelTexGenParameterivSGIS (GLenum, GLint *);
+GLAPI void APIENTRY glGetPixelTexGenParameterfvSGIS (GLenum, GLfloat *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLPIXELTEXGENPARAMETERISGISPROC) (GLenum pname, GLint param);
+typedef void (APIENTRYP PFNGLPIXELTEXGENPARAMETERIVSGISPROC) (GLenum pname, const GLint *params);
+typedef void (APIENTRYP PFNGLPIXELTEXGENPARAMETERFSGISPROC) (GLenum pname, GLfloat param);
+typedef void (APIENTRYP PFNGLPIXELTEXGENPARAMETERFVSGISPROC) (GLenum pname, const GLfloat *params);
+typedef void (APIENTRYP PFNGLGETPIXELTEXGENPARAMETERIVSGISPROC) (GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETPIXELTEXGENPARAMETERFVSGISPROC) (GLenum pname, GLfloat *params);
+#endif
+
+#ifndef GL_SGIS_texture4D
+#define GL_SGIS_texture4D 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glTexImage4DSGIS (GLenum, GLint, GLenum, GLsizei, GLsizei, GLsizei, GLsizei, GLint, GLenum, GLenum, const GLvoid *);
+GLAPI void APIENTRY glTexSubImage4DSGIS (GLenum, GLint, GLint, GLint, GLint, GLint, GLsizei, GLsizei, GLsizei, GLsizei, GLenum, GLenum, const GLvoid *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLTEXIMAGE4DSGISPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLsizei size4d, GLint border, GLenum format, GLenum type, const GLvoid *pixels);
+typedef void (APIENTRYP PFNGLTEXSUBIMAGE4DSGISPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint woffset, GLsizei width, GLsizei height, GLsizei depth, GLsizei size4d, GLenum format, GLenum type, const GLvoid *pixels);
+#endif
+
+#ifndef GL_SGI_texture_color_table
+#define GL_SGI_texture_color_table 1
+#endif
+
+#ifndef GL_EXT_cmyka
+#define GL_EXT_cmyka 1
+#endif
+
+#ifndef GL_EXT_texture_object
+#define GL_EXT_texture_object 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI GLboolean APIENTRY glAreTexturesResidentEXT (GLsizei, const GLuint *, GLboolean *);
+GLAPI void APIENTRY glBindTextureEXT (GLenum, GLuint);
+GLAPI void APIENTRY glDeleteTexturesEXT (GLsizei, const GLuint *);
+GLAPI void APIENTRY glGenTexturesEXT (GLsizei, GLuint *);
+GLAPI GLboolean APIENTRY glIsTextureEXT (GLuint);
+GLAPI void APIENTRY glPrioritizeTexturesEXT (GLsizei, const GLuint *, const GLclampf *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef GLboolean (APIENTRYP PFNGLARETEXTURESRESIDENTEXTPROC) (GLsizei n, const GLuint *textures, GLboolean *residences);
+typedef void (APIENTRYP PFNGLBINDTEXTUREEXTPROC) (GLenum target, GLuint texture);
+typedef void (APIENTRYP PFNGLDELETETEXTURESEXTPROC) (GLsizei n, const GLuint *textures);
+typedef void (APIENTRYP PFNGLGENTEXTURESEXTPROC) (GLsizei n, GLuint *textures);
+typedef GLboolean (APIENTRYP PFNGLISTEXTUREEXTPROC) (GLuint texture);
+typedef void (APIENTRYP PFNGLPRIORITIZETEXTURESEXTPROC) (GLsizei n, const GLuint *textures, const GLclampf *priorities);
+#endif
+
+#ifndef GL_SGIS_detail_texture
+#define GL_SGIS_detail_texture 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glDetailTexFuncSGIS (GLenum, GLsizei, const GLfloat *);
+GLAPI void APIENTRY glGetDetailTexFuncSGIS (GLenum, GLfloat *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLDETAILTEXFUNCSGISPROC) (GLenum target, GLsizei n, const GLfloat *points);
+typedef void (APIENTRYP PFNGLGETDETAILTEXFUNCSGISPROC) (GLenum target, GLfloat *points);
+#endif
+
+#ifndef GL_SGIS_sharpen_texture
+#define GL_SGIS_sharpen_texture 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glSharpenTexFuncSGIS (GLenum, GLsizei, const GLfloat *);
+GLAPI void APIENTRY glGetSharpenTexFuncSGIS (GLenum, GLfloat *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLSHARPENTEXFUNCSGISPROC) (GLenum target, GLsizei n, const GLfloat *points);
+typedef void (APIENTRYP PFNGLGETSHARPENTEXFUNCSGISPROC) (GLenum target, GLfloat *points);
+#endif
+
+#ifndef GL_EXT_packed_pixels
+#define GL_EXT_packed_pixels 1
+#endif
+
+#ifndef GL_SGIS_texture_lod
+#define GL_SGIS_texture_lod 1
+#endif
+
+#ifndef GL_SGIS_multisample
+#define GL_SGIS_multisample 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glSampleMaskSGIS (GLclampf, GLboolean);
+GLAPI void APIENTRY glSamplePatternSGIS (GLenum);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLSAMPLEMASKSGISPROC) (GLclampf value, GLboolean invert);
+typedef void (APIENTRYP PFNGLSAMPLEPATTERNSGISPROC) (GLenum pattern);
+#endif
+
+#ifndef GL_EXT_rescale_normal
+#define GL_EXT_rescale_normal 1
+#endif
+
+#ifndef GL_EXT_vertex_array
+#define GL_EXT_vertex_array 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glArrayElementEXT (GLint);
+GLAPI void APIENTRY glColorPointerEXT (GLint, GLenum, GLsizei, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glDrawArraysEXT (GLenum, GLint, GLsizei);
+GLAPI void APIENTRY glEdgeFlagPointerEXT (GLsizei, GLsizei, const GLboolean *);
+GLAPI void APIENTRY glGetPointervEXT (GLenum, GLvoid* *);
+GLAPI void APIENTRY glIndexPointerEXT (GLenum, GLsizei, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glNormalPointerEXT (GLenum, GLsizei, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glTexCoordPointerEXT (GLint, GLenum, GLsizei, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glVertexPointerEXT (GLint, GLenum, GLsizei, GLsizei, const GLvoid *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLARRAYELEMENTEXTPROC) (GLint i);
+typedef void (APIENTRYP PFNGLCOLORPOINTEREXTPROC) (GLint size, GLenum type, GLsizei stride, GLsizei count, const GLvoid *pointer);
+typedef void (APIENTRYP PFNGLDRAWARRAYSEXTPROC) (GLenum mode, GLint first, GLsizei count);
+typedef void (APIENTRYP PFNGLEDGEFLAGPOINTEREXTPROC) (GLsizei stride, GLsizei count, const GLboolean *pointer);
+typedef void (APIENTRYP PFNGLGETPOINTERVEXTPROC) (GLenum pname, GLvoid* *params);
+typedef void (APIENTRYP PFNGLINDEXPOINTEREXTPROC) (GLenum type, GLsizei stride, GLsizei count, const GLvoid *pointer);
+typedef void (APIENTRYP PFNGLNORMALPOINTEREXTPROC) (GLenum type, GLsizei stride, GLsizei count, const GLvoid *pointer);
+typedef void (APIENTRYP PFNGLTEXCOORDPOINTEREXTPROC) (GLint size, GLenum type, GLsizei stride, GLsizei count, const GLvoid *pointer);
+typedef void (APIENTRYP PFNGLVERTEXPOINTEREXTPROC) (GLint size, GLenum type, GLsizei stride, GLsizei count, const GLvoid *pointer);
+#endif
+
+#ifndef GL_EXT_misc_attribute
+#define GL_EXT_misc_attribute 1
+#endif
+
+#ifndef GL_SGIS_generate_mipmap
+#define GL_SGIS_generate_mipmap 1
+#endif
+
+#ifndef GL_SGIX_clipmap
+#define GL_SGIX_clipmap 1
+#endif
+
+#ifndef GL_SGIX_shadow
+#define GL_SGIX_shadow 1
+#endif
+
+#ifndef GL_SGIS_texture_edge_clamp
+#define GL_SGIS_texture_edge_clamp 1
+#endif
+
+#ifndef GL_SGIS_texture_border_clamp
+#define GL_SGIS_texture_border_clamp 1
+#endif
+
+#ifndef GL_EXT_blend_minmax
+#define GL_EXT_blend_minmax 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glBlendEquationEXT (GLenum);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLBLENDEQUATIONEXTPROC) (GLenum mode);
+#endif
+
+#ifndef GL_EXT_blend_subtract
+#define GL_EXT_blend_subtract 1
+#endif
+
+#ifndef GL_EXT_blend_logic_op
+#define GL_EXT_blend_logic_op 1
+#endif
+
+#ifndef GL_SGIX_interlace
+#define GL_SGIX_interlace 1
+#endif
+
+#ifndef GL_SGIX_pixel_tiles
+#define GL_SGIX_pixel_tiles 1
+#endif
+
+#ifndef GL_SGIX_texture_select
+#define GL_SGIX_texture_select 1
+#endif
+
+#ifndef GL_SGIX_sprite
+#define GL_SGIX_sprite 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glSpriteParameterfSGIX (GLenum, GLfloat);
+GLAPI void APIENTRY glSpriteParameterfvSGIX (GLenum, const GLfloat *);
+GLAPI void APIENTRY glSpriteParameteriSGIX (GLenum, GLint);
+GLAPI void APIENTRY glSpriteParameterivSGIX (GLenum, const GLint *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLSPRITEPARAMETERFSGIXPROC) (GLenum pname, GLfloat param);
+typedef void (APIENTRYP PFNGLSPRITEPARAMETERFVSGIXPROC) (GLenum pname, const GLfloat *params);
+typedef void (APIENTRYP PFNGLSPRITEPARAMETERISGIXPROC) (GLenum pname, GLint param);
+typedef void (APIENTRYP PFNGLSPRITEPARAMETERIVSGIXPROC) (GLenum pname, const GLint *params);
+#endif
+
+#ifndef GL_SGIX_texture_multi_buffer
+#define GL_SGIX_texture_multi_buffer 1
+#endif
+
+#ifndef GL_EXT_point_parameters
+#define GL_EXT_point_parameters 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glPointParameterfEXT (GLenum, GLfloat);
+GLAPI void APIENTRY glPointParameterfvEXT (GLenum, const GLfloat *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLPOINTPARAMETERFEXTPROC) (GLenum pname, GLfloat param);
+typedef void (APIENTRYP PFNGLPOINTPARAMETERFVEXTPROC) (GLenum pname, const GLfloat *params);
+#endif
+
+#ifndef GL_SGIS_point_parameters
+#define GL_SGIS_point_parameters 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glPointParameterfSGIS (GLenum, GLfloat);
+GLAPI void APIENTRY glPointParameterfvSGIS (GLenum, const GLfloat *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLPOINTPARAMETERFSGISPROC) (GLenum pname, GLfloat param);
+typedef void (APIENTRYP PFNGLPOINTPARAMETERFVSGISPROC) (GLenum pname, const GLfloat *params);
+#endif
+
+#ifndef GL_SGIX_instruments
+#define GL_SGIX_instruments 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI GLint APIENTRY glGetInstrumentsSGIX (void);
+GLAPI void APIENTRY glInstrumentsBufferSGIX (GLsizei, GLint *);
+GLAPI GLint APIENTRY glPollInstrumentsSGIX (GLint *);
+GLAPI void APIENTRY glReadInstrumentsSGIX (GLint);
+GLAPI void APIENTRY glStartInstrumentsSGIX (void);
+GLAPI void APIENTRY glStopInstrumentsSGIX (GLint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef GLint (APIENTRYP PFNGLGETINSTRUMENTSSGIXPROC) (void);
+typedef void (APIENTRYP PFNGLINSTRUMENTSBUFFERSGIXPROC) (GLsizei size, GLint *buffer);
+typedef GLint (APIENTRYP PFNGLPOLLINSTRUMENTSSGIXPROC) (GLint *marker_p);
+typedef void (APIENTRYP PFNGLREADINSTRUMENTSSGIXPROC) (GLint marker);
+typedef void (APIENTRYP PFNGLSTARTINSTRUMENTSSGIXPROC) (void);
+typedef void (APIENTRYP PFNGLSTOPINSTRUMENTSSGIXPROC) (GLint marker);
+#endif
+
+#ifndef GL_SGIX_texture_scale_bias
+#define GL_SGIX_texture_scale_bias 1
+#endif
+
+#ifndef GL_SGIX_framezoom
+#define GL_SGIX_framezoom 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glFrameZoomSGIX (GLint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLFRAMEZOOMSGIXPROC) (GLint factor);
+#endif
+
+#ifndef GL_SGIX_tag_sample_buffer
+#define GL_SGIX_tag_sample_buffer 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glTagSampleBufferSGIX (void);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLTAGSAMPLEBUFFERSGIXPROC) (void);
+#endif
+
+#ifndef GL_SGIX_polynomial_ffd
+#define GL_SGIX_polynomial_ffd 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glDeformationMap3dSGIX (GLenum, GLdouble, GLdouble, GLint, GLint, GLdouble, GLdouble, GLint, GLint, GLdouble, GLdouble, GLint, GLint, const GLdouble *);
+GLAPI void APIENTRY glDeformationMap3fSGIX (GLenum, GLfloat, GLfloat, GLint, GLint, GLfloat, GLfloat, GLint, GLint, GLfloat, GLfloat, GLint, GLint, const GLfloat *);
+GLAPI void APIENTRY glDeformSGIX (GLbitfield);
+GLAPI void APIENTRY glLoadIdentityDeformationMapSGIX (GLbitfield);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLDEFORMATIONMAP3DSGIXPROC) (GLenum target, GLdouble u1, GLdouble u2, GLint ustride, GLint uorder, GLdouble v1, GLdouble v2, GLint vstride, GLint vorder, GLdouble w1, GLdouble w2, GLint wstride, GLint worder, const GLdouble *points);
+typedef void (APIENTRYP PFNGLDEFORMATIONMAP3FSGIXPROC) (GLenum target, GLfloat u1, GLfloat u2, GLint ustride, GLint uorder, GLfloat v1, GLfloat v2, GLint vstride, GLint vorder, GLfloat w1, GLfloat w2, GLint wstride, GLint worder, const GLfloat *points);
+typedef void (APIENTRYP PFNGLDEFORMSGIXPROC) (GLbitfield mask);
+typedef void (APIENTRYP PFNGLLOADIDENTITYDEFORMATIONMAPSGIXPROC) (GLbitfield mask);
+#endif
+
+#ifndef GL_SGIX_reference_plane
+#define GL_SGIX_reference_plane 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glReferencePlaneSGIX (const GLdouble *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLREFERENCEPLANESGIXPROC) (const GLdouble *equation);
+#endif
+
+#ifndef GL_SGIX_flush_raster
+#define GL_SGIX_flush_raster 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glFlushRasterSGIX (void);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLFLUSHRASTERSGIXPROC) (void);
+#endif
+
+#ifndef GL_SGIX_depth_texture
+#define GL_SGIX_depth_texture 1
+#endif
+
+#ifndef GL_SGIS_fog_function
+#define GL_SGIS_fog_function 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glFogFuncSGIS (GLsizei, const GLfloat *);
+GLAPI void APIENTRY glGetFogFuncSGIS (GLfloat *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLFOGFUNCSGISPROC) (GLsizei n, const GLfloat *points);
+typedef void (APIENTRYP PFNGLGETFOGFUNCSGISPROC) (GLfloat *points);
+#endif
+
+#ifndef GL_SGIX_fog_offset
+#define GL_SGIX_fog_offset 1
+#endif
+
+#ifndef GL_HP_image_transform
+#define GL_HP_image_transform 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glImageTransformParameteriHP (GLenum, GLenum, GLint);
+GLAPI void APIENTRY glImageTransformParameterfHP (GLenum, GLenum, GLfloat);
+GLAPI void APIENTRY glImageTransformParameterivHP (GLenum, GLenum, const GLint *);
+GLAPI void APIENTRY glImageTransformParameterfvHP (GLenum, GLenum, const GLfloat *);
+GLAPI void APIENTRY glGetImageTransformParameterivHP (GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glGetImageTransformParameterfvHP (GLenum, GLenum, GLfloat *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLIMAGETRANSFORMPARAMETERIHPPROC) (GLenum target, GLenum pname, GLint param);
+typedef void (APIENTRYP PFNGLIMAGETRANSFORMPARAMETERFHPPROC) (GLenum target, GLenum pname, GLfloat param);
+typedef void (APIENTRYP PFNGLIMAGETRANSFORMPARAMETERIVHPPROC) (GLenum target, GLenum pname, const GLint *params);
+typedef void (APIENTRYP PFNGLIMAGETRANSFORMPARAMETERFVHPPROC) (GLenum target, GLenum pname, const GLfloat *params);
+typedef void (APIENTRYP PFNGLGETIMAGETRANSFORMPARAMETERIVHPPROC) (GLenum target, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETIMAGETRANSFORMPARAMETERFVHPPROC) (GLenum target, GLenum pname, GLfloat *params);
+#endif
+
+#ifndef GL_HP_convolution_border_modes
+#define GL_HP_convolution_border_modes 1
+#endif
+
+#ifndef GL_SGIX_texture_add_env
+#define GL_SGIX_texture_add_env 1
+#endif
+
+#ifndef GL_EXT_color_subtable
+#define GL_EXT_color_subtable 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glColorSubTableEXT (GLenum, GLsizei, GLsizei, GLenum, GLenum, const GLvoid *);
+GLAPI void APIENTRY glCopyColorSubTableEXT (GLenum, GLsizei, GLint, GLint, GLsizei);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLCOLORSUBTABLEEXTPROC) (GLenum target, GLsizei start, GLsizei count, GLenum format, GLenum type, const GLvoid *data);
+typedef void (APIENTRYP PFNGLCOPYCOLORSUBTABLEEXTPROC) (GLenum target, GLsizei start, GLint x, GLint y, GLsizei width);
+#endif
+
+#ifndef GL_PGI_vertex_hints
+#define GL_PGI_vertex_hints 1
+#endif
+
+#ifndef GL_PGI_misc_hints
+#define GL_PGI_misc_hints 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glHintPGI (GLenum, GLint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLHINTPGIPROC) (GLenum target, GLint mode);
+#endif
+
+#ifndef GL_EXT_paletted_texture
+#define GL_EXT_paletted_texture 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glColorTableEXT (GLenum, GLenum, GLsizei, GLenum, GLenum, const GLvoid *);
+GLAPI void APIENTRY glGetColorTableEXT (GLenum, GLenum, GLenum, GLvoid *);
+GLAPI void APIENTRY glGetColorTableParameterivEXT (GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glGetColorTableParameterfvEXT (GLenum, GLenum, GLfloat *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLCOLORTABLEEXTPROC) (GLenum target, GLenum internalFormat, GLsizei width, GLenum format, GLenum type, const GLvoid *table);
+typedef void (APIENTRYP PFNGLGETCOLORTABLEEXTPROC) (GLenum target, GLenum format, GLenum type, GLvoid *data);
+typedef void (APIENTRYP PFNGLGETCOLORTABLEPARAMETERIVEXTPROC) (GLenum target, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETCOLORTABLEPARAMETERFVEXTPROC) (GLenum target, GLenum pname, GLfloat *params);
+#endif
+
+#ifndef GL_EXT_clip_volume_hint
+#define GL_EXT_clip_volume_hint 1
+#endif
+
+#ifndef GL_SGIX_list_priority
+#define GL_SGIX_list_priority 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glGetListParameterfvSGIX (GLuint, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetListParameterivSGIX (GLuint, GLenum, GLint *);
+GLAPI void APIENTRY glListParameterfSGIX (GLuint, GLenum, GLfloat);
+GLAPI void APIENTRY glListParameterfvSGIX (GLuint, GLenum, const GLfloat *);
+GLAPI void APIENTRY glListParameteriSGIX (GLuint, GLenum, GLint);
+GLAPI void APIENTRY glListParameterivSGIX (GLuint, GLenum, const GLint *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLGETLISTPARAMETERFVSGIXPROC) (GLuint list, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETLISTPARAMETERIVSGIXPROC) (GLuint list, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLLISTPARAMETERFSGIXPROC) (GLuint list, GLenum pname, GLfloat param);
+typedef void (APIENTRYP PFNGLLISTPARAMETERFVSGIXPROC) (GLuint list, GLenum pname, const GLfloat *params);
+typedef void (APIENTRYP PFNGLLISTPARAMETERISGIXPROC) (GLuint list, GLenum pname, GLint param);
+typedef void (APIENTRYP PFNGLLISTPARAMETERIVSGIXPROC) (GLuint list, GLenum pname, const GLint *params);
+#endif
+
+#ifndef GL_SGIX_ir_instrument1
+#define GL_SGIX_ir_instrument1 1
+#endif
+
+#ifndef GL_SGIX_calligraphic_fragment
+#define GL_SGIX_calligraphic_fragment 1
+#endif
+
+#ifndef GL_SGIX_texture_lod_bias
+#define GL_SGIX_texture_lod_bias 1
+#endif
+
+#ifndef GL_SGIX_shadow_ambient
+#define GL_SGIX_shadow_ambient 1
+#endif
+
+#ifndef GL_EXT_index_texture
+#define GL_EXT_index_texture 1
+#endif
+
+#ifndef GL_EXT_index_material
+#define GL_EXT_index_material 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glIndexMaterialEXT (GLenum, GLenum);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLINDEXMATERIALEXTPROC) (GLenum face, GLenum mode);
+#endif
+
+#ifndef GL_EXT_index_func
+#define GL_EXT_index_func 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glIndexFuncEXT (GLenum, GLclampf);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLINDEXFUNCEXTPROC) (GLenum func, GLclampf ref);
+#endif
+
+#ifndef GL_EXT_index_array_formats
+#define GL_EXT_index_array_formats 1
+#endif
+
+#ifndef GL_EXT_compiled_vertex_array
+#define GL_EXT_compiled_vertex_array 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glLockArraysEXT (GLint, GLsizei);
+GLAPI void APIENTRY glUnlockArraysEXT (void);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLLOCKARRAYSEXTPROC) (GLint first, GLsizei count);
+typedef void (APIENTRYP PFNGLUNLOCKARRAYSEXTPROC) (void);
+#endif
+
+#ifndef GL_EXT_cull_vertex
+#define GL_EXT_cull_vertex 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glCullParameterdvEXT (GLenum, GLdouble *);
+GLAPI void APIENTRY glCullParameterfvEXT (GLenum, GLfloat *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLCULLPARAMETERDVEXTPROC) (GLenum pname, GLdouble *params);
+typedef void (APIENTRYP PFNGLCULLPARAMETERFVEXTPROC) (GLenum pname, GLfloat *params);
+#endif
+
+#ifndef GL_SGIX_ycrcb
+#define GL_SGIX_ycrcb 1
+#endif
+
+#ifndef GL_SGIX_fragment_lighting
+#define GL_SGIX_fragment_lighting 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glFragmentColorMaterialSGIX (GLenum, GLenum);
+GLAPI void APIENTRY glFragmentLightfSGIX (GLenum, GLenum, GLfloat);
+GLAPI void APIENTRY glFragmentLightfvSGIX (GLenum, GLenum, const GLfloat *);
+GLAPI void APIENTRY glFragmentLightiSGIX (GLenum, GLenum, GLint);
+GLAPI void APIENTRY glFragmentLightivSGIX (GLenum, GLenum, const GLint *);
+GLAPI void APIENTRY glFragmentLightModelfSGIX (GLenum, GLfloat);
+GLAPI void APIENTRY glFragmentLightModelfvSGIX (GLenum, const GLfloat *);
+GLAPI void APIENTRY glFragmentLightModeliSGIX (GLenum, GLint);
+GLAPI void APIENTRY glFragmentLightModelivSGIX (GLenum, const GLint *);
+GLAPI void APIENTRY glFragmentMaterialfSGIX (GLenum, GLenum, GLfloat);
+GLAPI void APIENTRY glFragmentMaterialfvSGIX (GLenum, GLenum, const GLfloat *);
+GLAPI void APIENTRY glFragmentMaterialiSGIX (GLenum, GLenum, GLint);
+GLAPI void APIENTRY glFragmentMaterialivSGIX (GLenum, GLenum, const GLint *);
+GLAPI void APIENTRY glGetFragmentLightfvSGIX (GLenum, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetFragmentLightivSGIX (GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glGetFragmentMaterialfvSGIX (GLenum, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetFragmentMaterialivSGIX (GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glLightEnviSGIX (GLenum, GLint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLFRAGMENTCOLORMATERIALSGIXPROC) (GLenum face, GLenum mode);
+typedef void (APIENTRYP PFNGLFRAGMENTLIGHTFSGIXPROC) (GLenum light, GLenum pname, GLfloat param);
+typedef void (APIENTRYP PFNGLFRAGMENTLIGHTFVSGIXPROC) (GLenum light, GLenum pname, const GLfloat *params);
+typedef void (APIENTRYP PFNGLFRAGMENTLIGHTISGIXPROC) (GLenum light, GLenum pname, GLint param);
+typedef void (APIENTRYP PFNGLFRAGMENTLIGHTIVSGIXPROC) (GLenum light, GLenum pname, const GLint *params);
+typedef void (APIENTRYP PFNGLFRAGMENTLIGHTMODELFSGIXPROC) (GLenum pname, GLfloat param);
+typedef void (APIENTRYP PFNGLFRAGMENTLIGHTMODELFVSGIXPROC) (GLenum pname, const GLfloat *params);
+typedef void (APIENTRYP PFNGLFRAGMENTLIGHTMODELISGIXPROC) (GLenum pname, GLint param);
+typedef void (APIENTRYP PFNGLFRAGMENTLIGHTMODELIVSGIXPROC) (GLenum pname, const GLint *params);
+typedef void (APIENTRYP PFNGLFRAGMENTMATERIALFSGIXPROC) (GLenum face, GLenum pname, GLfloat param);
+typedef void (APIENTRYP PFNGLFRAGMENTMATERIALFVSGIXPROC) (GLenum face, GLenum pname, const GLfloat *params);
+typedef void (APIENTRYP PFNGLFRAGMENTMATERIALISGIXPROC) (GLenum face, GLenum pname, GLint param);
+typedef void (APIENTRYP PFNGLFRAGMENTMATERIALIVSGIXPROC) (GLenum face, GLenum pname, const GLint *params);
+typedef void (APIENTRYP PFNGLGETFRAGMENTLIGHTFVSGIXPROC) (GLenum light, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETFRAGMENTLIGHTIVSGIXPROC) (GLenum light, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETFRAGMENTMATERIALFVSGIXPROC) (GLenum face, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETFRAGMENTMATERIALIVSGIXPROC) (GLenum face, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLLIGHTENVISGIXPROC) (GLenum pname, GLint param);
+#endif
+
+#ifndef GL_IBM_rasterpos_clip
+#define GL_IBM_rasterpos_clip 1
+#endif
+
+#ifndef GL_HP_texture_lighting
+#define GL_HP_texture_lighting 1
+#endif
+
+#ifndef GL_EXT_draw_range_elements
+#define GL_EXT_draw_range_elements 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glDrawRangeElementsEXT (GLenum, GLuint, GLuint, GLsizei, GLenum, const GLvoid *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLDRAWRANGEELEMENTSEXTPROC) (GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const GLvoid *indices);
+#endif
+
+#ifndef GL_WIN_phong_shading
+#define GL_WIN_phong_shading 1
+#endif
+
+#ifndef GL_WIN_specular_fog
+#define GL_WIN_specular_fog 1
+#endif
+
+#ifndef GL_EXT_light_texture
+#define GL_EXT_light_texture 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glApplyTextureEXT (GLenum);
+GLAPI void APIENTRY glTextureLightEXT (GLenum);
+GLAPI void APIENTRY glTextureMaterialEXT (GLenum, GLenum);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLAPPLYTEXTUREEXTPROC) (GLenum mode);
+typedef void (APIENTRYP PFNGLTEXTURELIGHTEXTPROC) (GLenum pname);
+typedef void (APIENTRYP PFNGLTEXTUREMATERIALEXTPROC) (GLenum face, GLenum mode);
+#endif
+
+#ifndef GL_SGIX_blend_alpha_minmax
+#define GL_SGIX_blend_alpha_minmax 1
+#endif
+
+#ifndef GL_EXT_bgra
+#define GL_EXT_bgra 1
+#endif
+
+#ifndef GL_SGIX_async
+#define GL_SGIX_async 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glAsyncMarkerSGIX (GLuint);
+GLAPI GLint APIENTRY glFinishAsyncSGIX (GLuint *);
+GLAPI GLint APIENTRY glPollAsyncSGIX (GLuint *);
+GLAPI GLuint APIENTRY glGenAsyncMarkersSGIX (GLsizei);
+GLAPI void APIENTRY glDeleteAsyncMarkersSGIX (GLuint, GLsizei);
+GLAPI GLboolean APIENTRY glIsAsyncMarkerSGIX (GLuint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLASYNCMARKERSGIXPROC) (GLuint marker);
+typedef GLint (APIENTRYP PFNGLFINISHASYNCSGIXPROC) (GLuint *markerp);
+typedef GLint (APIENTRYP PFNGLPOLLASYNCSGIXPROC) (GLuint *markerp);
+typedef GLuint (APIENTRYP PFNGLGENASYNCMARKERSSGIXPROC) (GLsizei range);
+typedef void (APIENTRYP PFNGLDELETEASYNCMARKERSSGIXPROC) (GLuint marker, GLsizei range);
+typedef GLboolean (APIENTRYP PFNGLISASYNCMARKERSGIXPROC) (GLuint marker);
+#endif
+
+#ifndef GL_SGIX_async_pixel
+#define GL_SGIX_async_pixel 1
+#endif
+
+#ifndef GL_SGIX_async_histogram
+#define GL_SGIX_async_histogram 1
+#endif
+
+#ifndef GL_INTEL_parallel_arrays
+#define GL_INTEL_parallel_arrays 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glVertexPointervINTEL (GLint, GLenum, const GLvoid* *);
+GLAPI void APIENTRY glNormalPointervINTEL (GLenum, const GLvoid* *);
+GLAPI void APIENTRY glColorPointervINTEL (GLint, GLenum, const GLvoid* *);
+GLAPI void APIENTRY glTexCoordPointervINTEL (GLint, GLenum, const GLvoid* *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLVERTEXPOINTERVINTELPROC) (GLint size, GLenum type, const GLvoid* *pointer);
+typedef void (APIENTRYP PFNGLNORMALPOINTERVINTELPROC) (GLenum type, const GLvoid* *pointer);
+typedef void (APIENTRYP PFNGLCOLORPOINTERVINTELPROC) (GLint size, GLenum type, const GLvoid* *pointer);
+typedef void (APIENTRYP PFNGLTEXCOORDPOINTERVINTELPROC) (GLint size, GLenum type, const GLvoid* *pointer);
+#endif
+
+#ifndef GL_HP_occlusion_test
+#define GL_HP_occlusion_test 1
+#endif
+
+#ifndef GL_EXT_pixel_transform
+#define GL_EXT_pixel_transform 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glPixelTransformParameteriEXT (GLenum, GLenum, GLint);
+GLAPI void APIENTRY glPixelTransformParameterfEXT (GLenum, GLenum, GLfloat);
+GLAPI void APIENTRY glPixelTransformParameterivEXT (GLenum, GLenum, const GLint *);
+GLAPI void APIENTRY glPixelTransformParameterfvEXT (GLenum, GLenum, const GLfloat *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLPIXELTRANSFORMPARAMETERIEXTPROC) (GLenum target, GLenum pname, GLint param);
+typedef void (APIENTRYP PFNGLPIXELTRANSFORMPARAMETERFEXTPROC) (GLenum target, GLenum pname, GLfloat param);
+typedef void (APIENTRYP PFNGLPIXELTRANSFORMPARAMETERIVEXTPROC) (GLenum target, GLenum pname, const GLint *params);
+typedef void (APIENTRYP PFNGLPIXELTRANSFORMPARAMETERFVEXTPROC) (GLenum target, GLenum pname, const GLfloat *params);
+#endif
+
+#ifndef GL_EXT_pixel_transform_color_table
+#define GL_EXT_pixel_transform_color_table 1
+#endif
+
+#ifndef GL_EXT_shared_texture_palette
+#define GL_EXT_shared_texture_palette 1
+#endif
+
+#ifndef GL_EXT_separate_specular_color
+#define GL_EXT_separate_specular_color 1
+#endif
+
+#ifndef GL_EXT_secondary_color
+#define GL_EXT_secondary_color 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glSecondaryColor3bEXT (GLbyte, GLbyte, GLbyte);
+GLAPI void APIENTRY glSecondaryColor3bvEXT (const GLbyte *);
+GLAPI void APIENTRY glSecondaryColor3dEXT (GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glSecondaryColor3dvEXT (const GLdouble *);
+GLAPI void APIENTRY glSecondaryColor3fEXT (GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glSecondaryColor3fvEXT (const GLfloat *);
+GLAPI void APIENTRY glSecondaryColor3iEXT (GLint, GLint, GLint);
+GLAPI void APIENTRY glSecondaryColor3ivEXT (const GLint *);
+GLAPI void APIENTRY glSecondaryColor3sEXT (GLshort, GLshort, GLshort);
+GLAPI void APIENTRY glSecondaryColor3svEXT (const GLshort *);
+GLAPI void APIENTRY glSecondaryColor3ubEXT (GLubyte, GLubyte, GLubyte);
+GLAPI void APIENTRY glSecondaryColor3ubvEXT (const GLubyte *);
+GLAPI void APIENTRY glSecondaryColor3uiEXT (GLuint, GLuint, GLuint);
+GLAPI void APIENTRY glSecondaryColor3uivEXT (const GLuint *);
+GLAPI void APIENTRY glSecondaryColor3usEXT (GLushort, GLushort, GLushort);
+GLAPI void APIENTRY glSecondaryColor3usvEXT (const GLushort *);
+GLAPI void APIENTRY glSecondaryColorPointerEXT (GLint, GLenum, GLsizei, const GLvoid *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3BEXTPROC) (GLbyte red, GLbyte green, GLbyte blue);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3BVEXTPROC) (const GLbyte *v);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3DEXTPROC) (GLdouble red, GLdouble green, GLdouble blue);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3DVEXTPROC) (const GLdouble *v);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3FEXTPROC) (GLfloat red, GLfloat green, GLfloat blue);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3FVEXTPROC) (const GLfloat *v);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3IEXTPROC) (GLint red, GLint green, GLint blue);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3IVEXTPROC) (const GLint *v);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3SEXTPROC) (GLshort red, GLshort green, GLshort blue);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3SVEXTPROC) (const GLshort *v);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3UBEXTPROC) (GLubyte red, GLubyte green, GLubyte blue);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3UBVEXTPROC) (const GLubyte *v);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3UIEXTPROC) (GLuint red, GLuint green, GLuint blue);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3UIVEXTPROC) (const GLuint *v);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3USEXTPROC) (GLushort red, GLushort green, GLushort blue);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3USVEXTPROC) (const GLushort *v);
+typedef void (APIENTRYP PFNGLSECONDARYCOLORPOINTEREXTPROC) (GLint size, GLenum type, GLsizei stride, const GLvoid *pointer);
+#endif
+
+#ifndef GL_EXT_texture_perturb_normal
+#define GL_EXT_texture_perturb_normal 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glTextureNormalEXT (GLenum);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLTEXTURENORMALEXTPROC) (GLenum mode);
+#endif
+
+#ifndef GL_EXT_multi_draw_arrays
+#define GL_EXT_multi_draw_arrays 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glMultiDrawArraysEXT (GLenum, GLint *, GLsizei *, GLsizei);
+GLAPI void APIENTRY glMultiDrawElementsEXT (GLenum, const GLsizei *, GLenum, const GLvoid* *, GLsizei);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLMULTIDRAWARRAYSEXTPROC) (GLenum mode, GLint *first, GLsizei *count, GLsizei primcount);
+typedef void (APIENTRYP PFNGLMULTIDRAWELEMENTSEXTPROC) (GLenum mode, const GLsizei *count, GLenum type, const GLvoid* *indices, GLsizei primcount);
+#endif
+
+#ifndef GL_EXT_fog_coord
+#define GL_EXT_fog_coord 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glFogCoordfEXT (GLfloat);
+GLAPI void APIENTRY glFogCoordfvEXT (const GLfloat *);
+GLAPI void APIENTRY glFogCoorddEXT (GLdouble);
+GLAPI void APIENTRY glFogCoorddvEXT (const GLdouble *);
+GLAPI void APIENTRY glFogCoordPointerEXT (GLenum, GLsizei, const GLvoid *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLFOGCOORDFEXTPROC) (GLfloat coord);
+typedef void (APIENTRYP PFNGLFOGCOORDFVEXTPROC) (const GLfloat *coord);
+typedef void (APIENTRYP PFNGLFOGCOORDDEXTPROC) (GLdouble coord);
+typedef void (APIENTRYP PFNGLFOGCOORDDVEXTPROC) (const GLdouble *coord);
+typedef void (APIENTRYP PFNGLFOGCOORDPOINTEREXTPROC) (GLenum type, GLsizei stride, const GLvoid *pointer);
+#endif
+
+#ifndef GL_REND_screen_coordinates
+#define GL_REND_screen_coordinates 1
+#endif
+
+#ifndef GL_EXT_coordinate_frame
+#define GL_EXT_coordinate_frame 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glTangent3bEXT (GLbyte, GLbyte, GLbyte);
+GLAPI void APIENTRY glTangent3bvEXT (const GLbyte *);
+GLAPI void APIENTRY glTangent3dEXT (GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glTangent3dvEXT (const GLdouble *);
+GLAPI void APIENTRY glTangent3fEXT (GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glTangent3fvEXT (const GLfloat *);
+GLAPI void APIENTRY glTangent3iEXT (GLint, GLint, GLint);
+GLAPI void APIENTRY glTangent3ivEXT (const GLint *);
+GLAPI void APIENTRY glTangent3sEXT (GLshort, GLshort, GLshort);
+GLAPI void APIENTRY glTangent3svEXT (const GLshort *);
+GLAPI void APIENTRY glBinormal3bEXT (GLbyte, GLbyte, GLbyte);
+GLAPI void APIENTRY glBinormal3bvEXT (const GLbyte *);
+GLAPI void APIENTRY glBinormal3dEXT (GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glBinormal3dvEXT (const GLdouble *);
+GLAPI void APIENTRY glBinormal3fEXT (GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glBinormal3fvEXT (const GLfloat *);
+GLAPI void APIENTRY glBinormal3iEXT (GLint, GLint, GLint);
+GLAPI void APIENTRY glBinormal3ivEXT (const GLint *);
+GLAPI void APIENTRY glBinormal3sEXT (GLshort, GLshort, GLshort);
+GLAPI void APIENTRY glBinormal3svEXT (const GLshort *);
+GLAPI void APIENTRY glTangentPointerEXT (GLenum, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glBinormalPointerEXT (GLenum, GLsizei, const GLvoid *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLTANGENT3BEXTPROC) (GLbyte tx, GLbyte ty, GLbyte tz);
+typedef void (APIENTRYP PFNGLTANGENT3BVEXTPROC) (const GLbyte *v);
+typedef void (APIENTRYP PFNGLTANGENT3DEXTPROC) (GLdouble tx, GLdouble ty, GLdouble tz);
+typedef void (APIENTRYP PFNGLTANGENT3DVEXTPROC) (const GLdouble *v);
+typedef void (APIENTRYP PFNGLTANGENT3FEXTPROC) (GLfloat tx, GLfloat ty, GLfloat tz);
+typedef void (APIENTRYP PFNGLTANGENT3FVEXTPROC) (const GLfloat *v);
+typedef void (APIENTRYP PFNGLTANGENT3IEXTPROC) (GLint tx, GLint ty, GLint tz);
+typedef void (APIENTRYP PFNGLTANGENT3IVEXTPROC) (const GLint *v);
+typedef void (APIENTRYP PFNGLTANGENT3SEXTPROC) (GLshort tx, GLshort ty, GLshort tz);
+typedef void (APIENTRYP PFNGLTANGENT3SVEXTPROC) (const GLshort *v);
+typedef void (APIENTRYP PFNGLBINORMAL3BEXTPROC) (GLbyte bx, GLbyte by, GLbyte bz);
+typedef void (APIENTRYP PFNGLBINORMAL3BVEXTPROC) (const GLbyte *v);
+typedef void (APIENTRYP PFNGLBINORMAL3DEXTPROC) (GLdouble bx, GLdouble by, GLdouble bz);
+typedef void (APIENTRYP PFNGLBINORMAL3DVEXTPROC) (const GLdouble *v);
+typedef void (APIENTRYP PFNGLBINORMAL3FEXTPROC) (GLfloat bx, GLfloat by, GLfloat bz);
+typedef void (APIENTRYP PFNGLBINORMAL3FVEXTPROC) (const GLfloat *v);
+typedef void (APIENTRYP PFNGLBINORMAL3IEXTPROC) (GLint bx, GLint by, GLint bz);
+typedef void (APIENTRYP PFNGLBINORMAL3IVEXTPROC) (const GLint *v);
+typedef void (APIENTRYP PFNGLBINORMAL3SEXTPROC) (GLshort bx, GLshort by, GLshort bz);
+typedef void (APIENTRYP PFNGLBINORMAL3SVEXTPROC) (const GLshort *v);
+typedef void (APIENTRYP PFNGLTANGENTPOINTEREXTPROC) (GLenum type, GLsizei stride, const GLvoid *pointer);
+typedef void (APIENTRYP PFNGLBINORMALPOINTEREXTPROC) (GLenum type, GLsizei stride, const GLvoid *pointer);
+#endif
+
+#ifndef GL_EXT_texture_env_combine
+#define GL_EXT_texture_env_combine 1
+#endif
+
+#ifndef GL_APPLE_specular_vector
+#define GL_APPLE_specular_vector 1
+#endif
+
+#ifndef GL_APPLE_transform_hint
+#define GL_APPLE_transform_hint 1
+#endif
+
+#ifndef GL_SGIX_fog_scale
+#define GL_SGIX_fog_scale 1
+#endif
+
+#ifndef GL_SUNX_constant_data
+#define GL_SUNX_constant_data 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glFinishTextureSUNX (void);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLFINISHTEXTURESUNXPROC) (void);
+#endif
+
+#ifndef GL_SUN_global_alpha
+#define GL_SUN_global_alpha 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glGlobalAlphaFactorbSUN (GLbyte);
+GLAPI void APIENTRY glGlobalAlphaFactorsSUN (GLshort);
+GLAPI void APIENTRY glGlobalAlphaFactoriSUN (GLint);
+GLAPI void APIENTRY glGlobalAlphaFactorfSUN (GLfloat);
+GLAPI void APIENTRY glGlobalAlphaFactordSUN (GLdouble);
+GLAPI void APIENTRY glGlobalAlphaFactorubSUN (GLubyte);
+GLAPI void APIENTRY glGlobalAlphaFactorusSUN (GLushort);
+GLAPI void APIENTRY glGlobalAlphaFactoruiSUN (GLuint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLGLOBALALPHAFACTORBSUNPROC) (GLbyte factor);
+typedef void (APIENTRYP PFNGLGLOBALALPHAFACTORSSUNPROC) (GLshort factor);
+typedef void (APIENTRYP PFNGLGLOBALALPHAFACTORISUNPROC) (GLint factor);
+typedef void (APIENTRYP PFNGLGLOBALALPHAFACTORFSUNPROC) (GLfloat factor);
+typedef void (APIENTRYP PFNGLGLOBALALPHAFACTORDSUNPROC) (GLdouble factor);
+typedef void (APIENTRYP PFNGLGLOBALALPHAFACTORUBSUNPROC) (GLubyte factor);
+typedef void (APIENTRYP PFNGLGLOBALALPHAFACTORUSSUNPROC) (GLushort factor);
+typedef void (APIENTRYP PFNGLGLOBALALPHAFACTORUISUNPROC) (GLuint factor);
+#endif
+
+#ifndef GL_SUN_triangle_list
+#define GL_SUN_triangle_list 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glReplacementCodeuiSUN (GLuint);
+GLAPI void APIENTRY glReplacementCodeusSUN (GLushort);
+GLAPI void APIENTRY glReplacementCodeubSUN (GLubyte);
+GLAPI void APIENTRY glReplacementCodeuivSUN (const GLuint *);
+GLAPI void APIENTRY glReplacementCodeusvSUN (const GLushort *);
+GLAPI void APIENTRY glReplacementCodeubvSUN (const GLubyte *);
+GLAPI void APIENTRY glReplacementCodePointerSUN (GLenum, GLsizei, const GLvoid* *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLREPLACEMENTCODEUISUNPROC) (GLuint code);
+typedef void (APIENTRYP PFNGLREPLACEMENTCODEUSSUNPROC) (GLushort code);
+typedef void (APIENTRYP PFNGLREPLACEMENTCODEUBSUNPROC) (GLubyte code);
+typedef void (APIENTRYP PFNGLREPLACEMENTCODEUIVSUNPROC) (const GLuint *code);
+typedef void (APIENTRYP PFNGLREPLACEMENTCODEUSVSUNPROC) (const GLushort *code);
+typedef void (APIENTRYP PFNGLREPLACEMENTCODEUBVSUNPROC) (const GLubyte *code);
+typedef void (APIENTRYP PFNGLREPLACEMENTCODEPOINTERSUNPROC) (GLenum type, GLsizei stride, const GLvoid* *pointer);
+#endif
+
+#ifndef GL_SUN_vertex
+#define GL_SUN_vertex 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glColor4ubVertex2fSUN (GLubyte, GLubyte, GLubyte, GLubyte, GLfloat, GLfloat);
+GLAPI void APIENTRY glColor4ubVertex2fvSUN (const GLubyte *, const GLfloat *);
+GLAPI void APIENTRY glColor4ubVertex3fSUN (GLubyte, GLubyte, GLubyte, GLubyte, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glColor4ubVertex3fvSUN (const GLubyte *, const GLfloat *);
+GLAPI void APIENTRY glColor3fVertex3fSUN (GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glColor3fVertex3fvSUN (const GLfloat *, const GLfloat *);
+GLAPI void APIENTRY glNormal3fVertex3fSUN (GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glNormal3fVertex3fvSUN (const GLfloat *, const GLfloat *);
+GLAPI void APIENTRY glColor4fNormal3fVertex3fSUN (GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glColor4fNormal3fVertex3fvSUN (const GLfloat *, const GLfloat *, const GLfloat *);
+GLAPI void APIENTRY glTexCoord2fVertex3fSUN (GLfloat, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glTexCoord2fVertex3fvSUN (const GLfloat *, const GLfloat *);
+GLAPI void APIENTRY glTexCoord4fVertex4fSUN (GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glTexCoord4fVertex4fvSUN (const GLfloat *, const GLfloat *);
+GLAPI void APIENTRY glTexCoord2fColor4ubVertex3fSUN (GLfloat, GLfloat, GLubyte, GLubyte, GLubyte, GLubyte, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glTexCoord2fColor4ubVertex3fvSUN (const GLfloat *, const GLubyte *, const GLfloat *);
+GLAPI void APIENTRY glTexCoord2fColor3fVertex3fSUN (GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glTexCoord2fColor3fVertex3fvSUN (const GLfloat *, const GLfloat *, const GLfloat *);
+GLAPI void APIENTRY glTexCoord2fNormal3fVertex3fSUN (GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glTexCoord2fNormal3fVertex3fvSUN (const GLfloat *, const GLfloat *, const GLfloat *);
+GLAPI void APIENTRY glTexCoord2fColor4fNormal3fVertex3fSUN (GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glTexCoord2fColor4fNormal3fVertex3fvSUN (const GLfloat *, const GLfloat *, const GLfloat *, const GLfloat *);
+GLAPI void APIENTRY glTexCoord4fColor4fNormal3fVertex4fSUN (GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glTexCoord4fColor4fNormal3fVertex4fvSUN (const GLfloat *, const GLfloat *, const GLfloat *, const GLfloat *);
+GLAPI void APIENTRY glReplacementCodeuiVertex3fSUN (GLuint, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glReplacementCodeuiVertex3fvSUN (const GLuint *, const GLfloat *);
+GLAPI void APIENTRY glReplacementCodeuiColor4ubVertex3fSUN (GLuint, GLubyte, GLubyte, GLubyte, GLubyte, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glReplacementCodeuiColor4ubVertex3fvSUN (const GLuint *, const GLubyte *, const GLfloat *);
+GLAPI void APIENTRY glReplacementCodeuiColor3fVertex3fSUN (GLuint, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glReplacementCodeuiColor3fVertex3fvSUN (const GLuint *, const GLfloat *, const GLfloat *);
+GLAPI void APIENTRY glReplacementCodeuiNormal3fVertex3fSUN (GLuint, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glReplacementCodeuiNormal3fVertex3fvSUN (const GLuint *, const GLfloat *, const GLfloat *);
+GLAPI void APIENTRY glReplacementCodeuiColor4fNormal3fVertex3fSUN (GLuint, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glReplacementCodeuiColor4fNormal3fVertex3fvSUN (const GLuint *, const GLfloat *, const GLfloat *, const GLfloat *);
+GLAPI void APIENTRY glReplacementCodeuiTexCoord2fVertex3fSUN (GLuint, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glReplacementCodeuiTexCoord2fVertex3fvSUN (const GLuint *, const GLfloat *, const GLfloat *);
+GLAPI void APIENTRY glReplacementCodeuiTexCoord2fNormal3fVertex3fSUN (GLuint, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glReplacementCodeuiTexCoord2fNormal3fVertex3fvSUN (const GLuint *, const GLfloat *, const GLfloat *, const GLfloat *);
+GLAPI void APIENTRY glReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fSUN (GLuint, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fvSUN (const GLuint *, const GLfloat *, const GLfloat *, const GLfloat *, const GLfloat *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLCOLOR4UBVERTEX2FSUNPROC) (GLubyte r, GLubyte g, GLubyte b, GLubyte a, GLfloat x, GLfloat y);
+typedef void (APIENTRYP PFNGLCOLOR4UBVERTEX2FVSUNPROC) (const GLubyte *c, const GLfloat *v);
+typedef void (APIENTRYP PFNGLCOLOR4UBVERTEX3FSUNPROC) (GLubyte r, GLubyte g, GLubyte b, GLubyte a, GLfloat x, GLfloat y, GLfloat z);
+typedef void (APIENTRYP PFNGLCOLOR4UBVERTEX3FVSUNPROC) (const GLubyte *c, const GLfloat *v);
+typedef void (APIENTRYP PFNGLCOLOR3FVERTEX3FSUNPROC) (GLfloat r, GLfloat g, GLfloat b, GLfloat x, GLfloat y, GLfloat z);
+typedef void (APIENTRYP PFNGLCOLOR3FVERTEX3FVSUNPROC) (const GLfloat *c, const GLfloat *v);
+typedef void (APIENTRYP PFNGLNORMAL3FVERTEX3FSUNPROC) (GLfloat nx, GLfloat ny, GLfloat nz, GLfloat x, GLfloat y, GLfloat z);
+typedef void (APIENTRYP PFNGLNORMAL3FVERTEX3FVSUNPROC) (const GLfloat *n, const GLfloat *v);
+typedef void (APIENTRYP PFNGLCOLOR4FNORMAL3FVERTEX3FSUNPROC) (GLfloat r, GLfloat g, GLfloat b, GLfloat a, GLfloat nx, GLfloat ny, GLfloat nz, GLfloat x, GLfloat y, GLfloat z);
+typedef void (APIENTRYP PFNGLCOLOR4FNORMAL3FVERTEX3FVSUNPROC) (const GLfloat *c, const GLfloat *n, const GLfloat *v);
+typedef void (APIENTRYP PFNGLTEXCOORD2FVERTEX3FSUNPROC) (GLfloat s, GLfloat t, GLfloat x, GLfloat y, GLfloat z);
+typedef void (APIENTRYP PFNGLTEXCOORD2FVERTEX3FVSUNPROC) (const GLfloat *tc, const GLfloat *v);
+typedef void (APIENTRYP PFNGLTEXCOORD4FVERTEX4FSUNPROC) (GLfloat s, GLfloat t, GLfloat p, GLfloat q, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+typedef void (APIENTRYP PFNGLTEXCOORD4FVERTEX4FVSUNPROC) (const GLfloat *tc, const GLfloat *v);
+typedef void (APIENTRYP PFNGLTEXCOORD2FCOLOR4UBVERTEX3FSUNPROC) (GLfloat s, GLfloat t, GLubyte r, GLubyte g, GLubyte b, GLubyte a, GLfloat x, GLfloat y, GLfloat z);
+typedef void (APIENTRYP PFNGLTEXCOORD2FCOLOR4UBVERTEX3FVSUNPROC) (const GLfloat *tc, const GLubyte *c, const GLfloat *v);
+typedef void (APIENTRYP PFNGLTEXCOORD2FCOLOR3FVERTEX3FSUNPROC) (GLfloat s, GLfloat t, GLfloat r, GLfloat g, GLfloat b, GLfloat x, GLfloat y, GLfloat z);
+typedef void (APIENTRYP PFNGLTEXCOORD2FCOLOR3FVERTEX3FVSUNPROC) (const GLfloat *tc, const GLfloat *c, const GLfloat *v);
+typedef void (APIENTRYP PFNGLTEXCOORD2FNORMAL3FVERTEX3FSUNPROC) (GLfloat s, GLfloat t, GLfloat nx, GLfloat ny, GLfloat nz, GLfloat x, GLfloat y, GLfloat z);
+typedef void (APIENTRYP PFNGLTEXCOORD2FNORMAL3FVERTEX3FVSUNPROC) (const GLfloat *tc, const GLfloat *n, const GLfloat *v);
+typedef void (APIENTRYP PFNGLTEXCOORD2FCOLOR4FNORMAL3FVERTEX3FSUNPROC) (GLfloat s, GLfloat t, GLfloat r, GLfloat g, GLfloat b, GLfloat a, GLfloat nx, GLfloat ny, GLfloat nz, GLfloat x, GLfloat y, GLfloat z);
+typedef void (APIENTRYP PFNGLTEXCOORD2FCOLOR4FNORMAL3FVERTEX3FVSUNPROC) (const GLfloat *tc, const GLfloat *c, const GLfloat *n, const GLfloat *v);
+typedef void (APIENTRYP PFNGLTEXCOORD4FCOLOR4FNORMAL3FVERTEX4FSUNPROC) (GLfloat s, GLfloat t, GLfloat p, GLfloat q, GLfloat r, GLfloat g, GLfloat b, GLfloat a, GLfloat nx, GLfloat ny, GLfloat nz, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+typedef void (APIENTRYP PFNGLTEXCOORD4FCOLOR4FNORMAL3FVERTEX4FVSUNPROC) (const GLfloat *tc, const GLfloat *c, const GLfloat *n, const GLfloat *v);
+typedef void (APIENTRYP PFNGLREPLACEMENTCODEUIVERTEX3FSUNPROC) (GLuint rc, GLfloat x, GLfloat y, GLfloat z);
+typedef void (APIENTRYP PFNGLREPLACEMENTCODEUIVERTEX3FVSUNPROC) (const GLuint *rc, const GLfloat *v);
+typedef void (APIENTRYP PFNGLREPLACEMENTCODEUICOLOR4UBVERTEX3FSUNPROC) (GLuint rc, GLubyte r, GLubyte g, GLubyte b, GLubyte a, GLfloat x, GLfloat y, GLfloat z);
+typedef void (APIENTRYP PFNGLREPLACEMENTCODEUICOLOR4UBVERTEX3FVSUNPROC) (const GLuint *rc, const GLubyte *c, const GLfloat *v);
+typedef void (APIENTRYP PFNGLREPLACEMENTCODEUICOLOR3FVERTEX3FSUNPROC) (GLuint rc, GLfloat r, GLfloat g, GLfloat b, GLfloat x, GLfloat y, GLfloat z);
+typedef void (APIENTRYP PFNGLREPLACEMENTCODEUICOLOR3FVERTEX3FVSUNPROC) (const GLuint *rc, const GLfloat *c, const GLfloat *v);
+typedef void (APIENTRYP PFNGLREPLACEMENTCODEUINORMAL3FVERTEX3FSUNPROC) (GLuint rc, GLfloat nx, GLfloat ny, GLfloat nz, GLfloat x, GLfloat y, GLfloat z);
+typedef void (APIENTRYP PFNGLREPLACEMENTCODEUINORMAL3FVERTEX3FVSUNPROC) (const GLuint *rc, const GLfloat *n, const GLfloat *v);
+typedef void (APIENTRYP PFNGLREPLACEMENTCODEUICOLOR4FNORMAL3FVERTEX3FSUNPROC) (GLuint rc, GLfloat r, GLfloat g, GLfloat b, GLfloat a, GLfloat nx, GLfloat ny, GLfloat nz, GLfloat x, GLfloat y, GLfloat z);
+typedef void (APIENTRYP PFNGLREPLACEMENTCODEUICOLOR4FNORMAL3FVERTEX3FVSUNPROC) (const GLuint *rc, const GLfloat *c, const GLfloat *n, const GLfloat *v);
+typedef void (APIENTRYP PFNGLREPLACEMENTCODEUITEXCOORD2FVERTEX3FSUNPROC) (GLuint rc, GLfloat s, GLfloat t, GLfloat x, GLfloat y, GLfloat z);
+typedef void (APIENTRYP PFNGLREPLACEMENTCODEUITEXCOORD2FVERTEX3FVSUNPROC) (const GLuint *rc, const GLfloat *tc, const GLfloat *v);
+typedef void (APIENTRYP PFNGLREPLACEMENTCODEUITEXCOORD2FNORMAL3FVERTEX3FSUNPROC) (GLuint rc, GLfloat s, GLfloat t, GLfloat nx, GLfloat ny, GLfloat nz, GLfloat x, GLfloat y, GLfloat z);
+typedef void (APIENTRYP PFNGLREPLACEMENTCODEUITEXCOORD2FNORMAL3FVERTEX3FVSUNPROC) (const GLuint *rc, const GLfloat *tc, const GLfloat *n, const GLfloat *v);
+typedef void (APIENTRYP PFNGLREPLACEMENTCODEUITEXCOORD2FCOLOR4FNORMAL3FVERTEX3FSUNPROC) (GLuint rc, GLfloat s, GLfloat t, GLfloat r, GLfloat g, GLfloat b, GLfloat a, GLfloat nx, GLfloat ny, GLfloat nz, GLfloat x, GLfloat y, GLfloat z);
+typedef void (APIENTRYP PFNGLREPLACEMENTCODEUITEXCOORD2FCOLOR4FNORMAL3FVERTEX3FVSUNPROC) (const GLuint *rc, const GLfloat *tc, const GLfloat *c, const GLfloat *n, const GLfloat *v);
+#endif
+
+#ifndef GL_EXT_blend_func_separate
+#define GL_EXT_blend_func_separate 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glBlendFuncSeparateEXT (GLenum, GLenum, GLenum, GLenum);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLBLENDFUNCSEPARATEEXTPROC) (GLenum sfactorRGB, GLenum dfactorRGB, GLenum sfactorAlpha, GLenum dfactorAlpha);
+#endif
+
+#ifndef GL_INGR_blend_func_separate
+#define GL_INGR_blend_func_separate 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glBlendFuncSeparateINGR (GLenum, GLenum, GLenum, GLenum);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLBLENDFUNCSEPARATEINGRPROC) (GLenum sfactorRGB, GLenum dfactorRGB, GLenum sfactorAlpha, GLenum dfactorAlpha);
+#endif
+
+#ifndef GL_INGR_color_clamp
+#define GL_INGR_color_clamp 1
+#endif
+
+#ifndef GL_INGR_interlace_read
+#define GL_INGR_interlace_read 1
+#endif
+
+#ifndef GL_EXT_stencil_wrap
+#define GL_EXT_stencil_wrap 1
+#endif
+
+#ifndef GL_EXT_422_pixels
+#define GL_EXT_422_pixels 1
+#endif
+
+#ifndef GL_NV_texgen_reflection
+#define GL_NV_texgen_reflection 1
+#endif
+
+#ifndef GL_SUN_convolution_border_modes
+#define GL_SUN_convolution_border_modes 1
+#endif
+
+#ifndef GL_EXT_texture_env_add
+#define GL_EXT_texture_env_add 1
+#endif
+
+#ifndef GL_EXT_texture_lod_bias
+#define GL_EXT_texture_lod_bias 1
+#endif
+
+#ifndef GL_EXT_texture_filter_anisotropic
+#define GL_EXT_texture_filter_anisotropic 1
+#endif
+
+#ifndef GL_EXT_vertex_weighting
+#define GL_EXT_vertex_weighting 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glVertexWeightfEXT (GLfloat);
+GLAPI void APIENTRY glVertexWeightfvEXT (const GLfloat *);
+GLAPI void APIENTRY glVertexWeightPointerEXT (GLsizei, GLenum, GLsizei, const GLvoid *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLVERTEXWEIGHTFEXTPROC) (GLfloat weight);
+typedef void (APIENTRYP PFNGLVERTEXWEIGHTFVEXTPROC) (const GLfloat *weight);
+typedef void (APIENTRYP PFNGLVERTEXWEIGHTPOINTEREXTPROC) (GLsizei size, GLenum type, GLsizei stride, const GLvoid *pointer);
+#endif
+
+#ifndef GL_NV_light_max_exponent
+#define GL_NV_light_max_exponent 1
+#endif
+
+#ifndef GL_NV_vertex_array_range
+#define GL_NV_vertex_array_range 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glFlushVertexArrayRangeNV (void);
+GLAPI void APIENTRY glVertexArrayRangeNV (GLsizei, const GLvoid *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLFLUSHVERTEXARRAYRANGENVPROC) (void);
+typedef void (APIENTRYP PFNGLVERTEXARRAYRANGENVPROC) (GLsizei length, const GLvoid *pointer);
+#endif
+
+#ifndef GL_NV_register_combiners
+#define GL_NV_register_combiners 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glCombinerParameterfvNV (GLenum, const GLfloat *);
+GLAPI void APIENTRY glCombinerParameterfNV (GLenum, GLfloat);
+GLAPI void APIENTRY glCombinerParameterivNV (GLenum, const GLint *);
+GLAPI void APIENTRY glCombinerParameteriNV (GLenum, GLint);
+GLAPI void APIENTRY glCombinerInputNV (GLenum, GLenum, GLenum, GLenum, GLenum, GLenum);
+GLAPI void APIENTRY glCombinerOutputNV (GLenum, GLenum, GLenum, GLenum, GLenum, GLenum, GLenum, GLboolean, GLboolean, GLboolean);
+GLAPI void APIENTRY glFinalCombinerInputNV (GLenum, GLenum, GLenum, GLenum);
+GLAPI void APIENTRY glGetCombinerInputParameterfvNV (GLenum, GLenum, GLenum, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetCombinerInputParameterivNV (GLenum, GLenum, GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glGetCombinerOutputParameterfvNV (GLenum, GLenum, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetCombinerOutputParameterivNV (GLenum, GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glGetFinalCombinerInputParameterfvNV (GLenum, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetFinalCombinerInputParameterivNV (GLenum, GLenum, GLint *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLCOMBINERPARAMETERFVNVPROC) (GLenum pname, const GLfloat *params);
+typedef void (APIENTRYP PFNGLCOMBINERPARAMETERFNVPROC) (GLenum pname, GLfloat param);
+typedef void (APIENTRYP PFNGLCOMBINERPARAMETERIVNVPROC) (GLenum pname, const GLint *params);
+typedef void (APIENTRYP PFNGLCOMBINERPARAMETERINVPROC) (GLenum pname, GLint param);
+typedef void (APIENTRYP PFNGLCOMBINERINPUTNVPROC) (GLenum stage, GLenum portion, GLenum variable, GLenum input, GLenum mapping, GLenum componentUsage);
+typedef void (APIENTRYP PFNGLCOMBINEROUTPUTNVPROC) (GLenum stage, GLenum portion, GLenum abOutput, GLenum cdOutput, GLenum sumOutput, GLenum scale, GLenum bias, GLboolean abDotProduct, GLboolean cdDotProduct, GLboolean muxSum);
+typedef void (APIENTRYP PFNGLFINALCOMBINERINPUTNVPROC) (GLenum variable, GLenum input, GLenum mapping, GLenum componentUsage);
+typedef void (APIENTRYP PFNGLGETCOMBINERINPUTPARAMETERFVNVPROC) (GLenum stage, GLenum portion, GLenum variable, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETCOMBINERINPUTPARAMETERIVNVPROC) (GLenum stage, GLenum portion, GLenum variable, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETCOMBINEROUTPUTPARAMETERFVNVPROC) (GLenum stage, GLenum portion, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETCOMBINEROUTPUTPARAMETERIVNVPROC) (GLenum stage, GLenum portion, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETFINALCOMBINERINPUTPARAMETERFVNVPROC) (GLenum variable, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETFINALCOMBINERINPUTPARAMETERIVNVPROC) (GLenum variable, GLenum pname, GLint *params);
+#endif
+
+#ifndef GL_NV_fog_distance
+#define GL_NV_fog_distance 1
+#endif
+
+#ifndef GL_NV_texgen_emboss
+#define GL_NV_texgen_emboss 1
+#endif
+
+#ifndef GL_NV_blend_square
+#define GL_NV_blend_square 1
+#endif
+
+#ifndef GL_NV_texture_env_combine4
+#define GL_NV_texture_env_combine4 1
+#endif
+
+#ifndef GL_MESA_resize_buffers
+#define GL_MESA_resize_buffers 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glResizeBuffersMESA (void);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLRESIZEBUFFERSMESAPROC) (void);
+#endif
+
+#ifndef GL_MESA_window_pos
+#define GL_MESA_window_pos 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glWindowPos2dMESA (GLdouble, GLdouble);
+GLAPI void APIENTRY glWindowPos2dvMESA (const GLdouble *);
+GLAPI void APIENTRY glWindowPos2fMESA (GLfloat, GLfloat);
+GLAPI void APIENTRY glWindowPos2fvMESA (const GLfloat *);
+GLAPI void APIENTRY glWindowPos2iMESA (GLint, GLint);
+GLAPI void APIENTRY glWindowPos2ivMESA (const GLint *);
+GLAPI void APIENTRY glWindowPos2sMESA (GLshort, GLshort);
+GLAPI void APIENTRY glWindowPos2svMESA (const GLshort *);
+GLAPI void APIENTRY glWindowPos3dMESA (GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glWindowPos3dvMESA (const GLdouble *);
+GLAPI void APIENTRY glWindowPos3fMESA (GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glWindowPos3fvMESA (const GLfloat *);
+GLAPI void APIENTRY glWindowPos3iMESA (GLint, GLint, GLint);
+GLAPI void APIENTRY glWindowPos3ivMESA (const GLint *);
+GLAPI void APIENTRY glWindowPos3sMESA (GLshort, GLshort, GLshort);
+GLAPI void APIENTRY glWindowPos3svMESA (const GLshort *);
+GLAPI void APIENTRY glWindowPos4dMESA (GLdouble, GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glWindowPos4dvMESA (const GLdouble *);
+GLAPI void APIENTRY glWindowPos4fMESA (GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glWindowPos4fvMESA (const GLfloat *);
+GLAPI void APIENTRY glWindowPos4iMESA (GLint, GLint, GLint, GLint);
+GLAPI void APIENTRY glWindowPos4ivMESA (const GLint *);
+GLAPI void APIENTRY glWindowPos4sMESA (GLshort, GLshort, GLshort, GLshort);
+GLAPI void APIENTRY glWindowPos4svMESA (const GLshort *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLWINDOWPOS2DMESAPROC) (GLdouble x, GLdouble y);
+typedef void (APIENTRYP PFNGLWINDOWPOS2DVMESAPROC) (const GLdouble *v);
+typedef void (APIENTRYP PFNGLWINDOWPOS2FMESAPROC) (GLfloat x, GLfloat y);
+typedef void (APIENTRYP PFNGLWINDOWPOS2FVMESAPROC) (const GLfloat *v);
+typedef void (APIENTRYP PFNGLWINDOWPOS2IMESAPROC) (GLint x, GLint y);
+typedef void (APIENTRYP PFNGLWINDOWPOS2IVMESAPROC) (const GLint *v);
+typedef void (APIENTRYP PFNGLWINDOWPOS2SMESAPROC) (GLshort x, GLshort y);
+typedef void (APIENTRYP PFNGLWINDOWPOS2SVMESAPROC) (const GLshort *v);
+typedef void (APIENTRYP PFNGLWINDOWPOS3DMESAPROC) (GLdouble x, GLdouble y, GLdouble z);
+typedef void (APIENTRYP PFNGLWINDOWPOS3DVMESAPROC) (const GLdouble *v);
+typedef void (APIENTRYP PFNGLWINDOWPOS3FMESAPROC) (GLfloat x, GLfloat y, GLfloat z);
+typedef void (APIENTRYP PFNGLWINDOWPOS3FVMESAPROC) (const GLfloat *v);
+typedef void (APIENTRYP PFNGLWINDOWPOS3IMESAPROC) (GLint x, GLint y, GLint z);
+typedef void (APIENTRYP PFNGLWINDOWPOS3IVMESAPROC) (const GLint *v);
+typedef void (APIENTRYP PFNGLWINDOWPOS3SMESAPROC) (GLshort x, GLshort y, GLshort z);
+typedef void (APIENTRYP PFNGLWINDOWPOS3SVMESAPROC) (const GLshort *v);
+typedef void (APIENTRYP PFNGLWINDOWPOS4DMESAPROC) (GLdouble x, GLdouble y, GLdouble z, GLdouble w);
+typedef void (APIENTRYP PFNGLWINDOWPOS4DVMESAPROC) (const GLdouble *v);
+typedef void (APIENTRYP PFNGLWINDOWPOS4FMESAPROC) (GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+typedef void (APIENTRYP PFNGLWINDOWPOS4FVMESAPROC) (const GLfloat *v);
+typedef void (APIENTRYP PFNGLWINDOWPOS4IMESAPROC) (GLint x, GLint y, GLint z, GLint w);
+typedef void (APIENTRYP PFNGLWINDOWPOS4IVMESAPROC) (const GLint *v);
+typedef void (APIENTRYP PFNGLWINDOWPOS4SMESAPROC) (GLshort x, GLshort y, GLshort z, GLshort w);
+typedef void (APIENTRYP PFNGLWINDOWPOS4SVMESAPROC) (const GLshort *v);
+#endif
+
+#ifndef GL_IBM_cull_vertex
+#define GL_IBM_cull_vertex 1
+#endif
+
+#ifndef GL_IBM_multimode_draw_arrays
+#define GL_IBM_multimode_draw_arrays 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glMultiModeDrawArraysIBM (const GLenum *, const GLint *, const GLsizei *, GLsizei, GLint);
+GLAPI void APIENTRY glMultiModeDrawElementsIBM (const GLenum *, const GLsizei *, GLenum, const GLvoid* const *, GLsizei, GLint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLMULTIMODEDRAWARRAYSIBMPROC) (const GLenum *mode, const GLint *first, const GLsizei *count, GLsizei primcount, GLint modestride);
+typedef void (APIENTRYP PFNGLMULTIMODEDRAWELEMENTSIBMPROC) (const GLenum *mode, const GLsizei *count, GLenum type, const GLvoid* const *indices, GLsizei primcount, GLint modestride);
+#endif
+
+#ifndef GL_IBM_vertex_array_lists
+#define GL_IBM_vertex_array_lists 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glColorPointerListIBM (GLint, GLenum, GLint, const GLvoid* *, GLint);
+GLAPI void APIENTRY glSecondaryColorPointerListIBM (GLint, GLenum, GLint, const GLvoid* *, GLint);
+GLAPI void APIENTRY glEdgeFlagPointerListIBM (GLint, const GLboolean* *, GLint);
+GLAPI void APIENTRY glFogCoordPointerListIBM (GLenum, GLint, const GLvoid* *, GLint);
+GLAPI void APIENTRY glIndexPointerListIBM (GLenum, GLint, const GLvoid* *, GLint);
+GLAPI void APIENTRY glNormalPointerListIBM (GLenum, GLint, const GLvoid* *, GLint);
+GLAPI void APIENTRY glTexCoordPointerListIBM (GLint, GLenum, GLint, const GLvoid* *, GLint);
+GLAPI void APIENTRY glVertexPointerListIBM (GLint, GLenum, GLint, const GLvoid* *, GLint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLCOLORPOINTERLISTIBMPROC) (GLint size, GLenum type, GLint stride, const GLvoid* *pointer, GLint ptrstride);
+typedef void (APIENTRYP PFNGLSECONDARYCOLORPOINTERLISTIBMPROC) (GLint size, GLenum type, GLint stride, const GLvoid* *pointer, GLint ptrstride);
+typedef void (APIENTRYP PFNGLEDGEFLAGPOINTERLISTIBMPROC) (GLint stride, const GLboolean* *pointer, GLint ptrstride);
+typedef void (APIENTRYP PFNGLFOGCOORDPOINTERLISTIBMPROC) (GLenum type, GLint stride, const GLvoid* *pointer, GLint ptrstride);
+typedef void (APIENTRYP PFNGLINDEXPOINTERLISTIBMPROC) (GLenum type, GLint stride, const GLvoid* *pointer, GLint ptrstride);
+typedef void (APIENTRYP PFNGLNORMALPOINTERLISTIBMPROC) (GLenum type, GLint stride, const GLvoid* *pointer, GLint ptrstride);
+typedef void (APIENTRYP PFNGLTEXCOORDPOINTERLISTIBMPROC) (GLint size, GLenum type, GLint stride, const GLvoid* *pointer, GLint ptrstride);
+typedef void (APIENTRYP PFNGLVERTEXPOINTERLISTIBMPROC) (GLint size, GLenum type, GLint stride, const GLvoid* *pointer, GLint ptrstride);
+#endif
+
+#ifndef GL_SGIX_subsample
+#define GL_SGIX_subsample 1
+#endif
+
+#ifndef GL_SGIX_ycrcba
+#define GL_SGIX_ycrcba 1
+#endif
+
+#ifndef GL_SGIX_ycrcb_subsample
+#define GL_SGIX_ycrcb_subsample 1
+#endif
+
+#ifndef GL_SGIX_depth_pass_instrument
+#define GL_SGIX_depth_pass_instrument 1
+#endif
+
+#ifndef GL_3DFX_texture_compression_FXT1
+#define GL_3DFX_texture_compression_FXT1 1
+#endif
+
+#ifndef GL_3DFX_multisample
+#define GL_3DFX_multisample 1
+#endif
+
+#ifndef GL_3DFX_tbuffer
+#define GL_3DFX_tbuffer 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glTbufferMask3DFX (GLuint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLTBUFFERMASK3DFXPROC) (GLuint mask);
+#endif
+
+#ifndef GL_EXT_multisample
+#define GL_EXT_multisample 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glSampleMaskEXT (GLclampf, GLboolean);
+GLAPI void APIENTRY glSamplePatternEXT (GLenum);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLSAMPLEMASKEXTPROC) (GLclampf value, GLboolean invert);
+typedef void (APIENTRYP PFNGLSAMPLEPATTERNEXTPROC) (GLenum pattern);
+#endif
+
+#ifndef GL_SGIX_vertex_preclip
+#define GL_SGIX_vertex_preclip 1
+#endif
+
+#ifndef GL_SGIX_convolution_accuracy
+#define GL_SGIX_convolution_accuracy 1
+#endif
+
+#ifndef GL_SGIX_resample
+#define GL_SGIX_resample 1
+#endif
+
+#ifndef GL_SGIS_point_line_texgen
+#define GL_SGIS_point_line_texgen 1
+#endif
+
+#ifndef GL_SGIS_texture_color_mask
+#define GL_SGIS_texture_color_mask 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glTextureColorMaskSGIS (GLboolean, GLboolean, GLboolean, GLboolean);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLTEXTURECOLORMASKSGISPROC) (GLboolean red, GLboolean green, GLboolean blue, GLboolean alpha);
+#endif
+
+#ifndef GL_SGIX_igloo_interface
+#define GL_SGIX_igloo_interface 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glIglooInterfaceSGIX (GLenum, const GLvoid *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLIGLOOINTERFACESGIXPROC) (GLenum pname, const GLvoid *params);
+#endif
+
+#ifndef GL_EXT_texture_env_dot3
+#define GL_EXT_texture_env_dot3 1
+#endif
+
+#ifndef GL_ATI_texture_mirror_once
+#define GL_ATI_texture_mirror_once 1
+#endif
+
+#ifndef GL_NV_fence
+#define GL_NV_fence 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glDeleteFencesNV (GLsizei, const GLuint *);
+GLAPI void APIENTRY glGenFencesNV (GLsizei, GLuint *);
+GLAPI GLboolean APIENTRY glIsFenceNV (GLuint);
+GLAPI GLboolean APIENTRY glTestFenceNV (GLuint);
+GLAPI void APIENTRY glGetFenceivNV (GLuint, GLenum, GLint *);
+GLAPI void APIENTRY glFinishFenceNV (GLuint);
+GLAPI void APIENTRY glSetFenceNV (GLuint, GLenum);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLDELETEFENCESNVPROC) (GLsizei n, const GLuint *fences);
+typedef void (APIENTRYP PFNGLGENFENCESNVPROC) (GLsizei n, GLuint *fences);
+typedef GLboolean (APIENTRYP PFNGLISFENCENVPROC) (GLuint fence);
+typedef GLboolean (APIENTRYP PFNGLTESTFENCENVPROC) (GLuint fence);
+typedef void (APIENTRYP PFNGLGETFENCEIVNVPROC) (GLuint fence, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLFINISHFENCENVPROC) (GLuint fence);
+typedef void (APIENTRYP PFNGLSETFENCENVPROC) (GLuint fence, GLenum condition);
+#endif
+
+#ifndef GL_NV_evaluators
+#define GL_NV_evaluators 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glMapControlPointsNV (GLenum, GLuint, GLenum, GLsizei, GLsizei, GLint, GLint, GLboolean, const GLvoid *);
+GLAPI void APIENTRY glMapParameterivNV (GLenum, GLenum, const GLint *);
+GLAPI void APIENTRY glMapParameterfvNV (GLenum, GLenum, const GLfloat *);
+GLAPI void APIENTRY glGetMapControlPointsNV (GLenum, GLuint, GLenum, GLsizei, GLsizei, GLboolean, GLvoid *);
+GLAPI void APIENTRY glGetMapParameterivNV (GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glGetMapParameterfvNV (GLenum, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetMapAttribParameterivNV (GLenum, GLuint, GLenum, GLint *);
+GLAPI void APIENTRY glGetMapAttribParameterfvNV (GLenum, GLuint, GLenum, GLfloat *);
+GLAPI void APIENTRY glEvalMapsNV (GLenum, GLenum);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLMAPCONTROLPOINTSNVPROC) (GLenum target, GLuint index, GLenum type, GLsizei ustride, GLsizei vstride, GLint uorder, GLint vorder, GLboolean packed, const GLvoid *points);
+typedef void (APIENTRYP PFNGLMAPPARAMETERIVNVPROC) (GLenum target, GLenum pname, const GLint *params);
+typedef void (APIENTRYP PFNGLMAPPARAMETERFVNVPROC) (GLenum target, GLenum pname, const GLfloat *params);
+typedef void (APIENTRYP PFNGLGETMAPCONTROLPOINTSNVPROC) (GLenum target, GLuint index, GLenum type, GLsizei ustride, GLsizei vstride, GLboolean packed, GLvoid *points);
+typedef void (APIENTRYP PFNGLGETMAPPARAMETERIVNVPROC) (GLenum target, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETMAPPARAMETERFVNVPROC) (GLenum target, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETMAPATTRIBPARAMETERIVNVPROC) (GLenum target, GLuint index, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETMAPATTRIBPARAMETERFVNVPROC) (GLenum target, GLuint index, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLEVALMAPSNVPROC) (GLenum target, GLenum mode);
+#endif
+
+#ifndef GL_NV_packed_depth_stencil
+#define GL_NV_packed_depth_stencil 1
+#endif
+
+#ifndef GL_NV_register_combiners2
+#define GL_NV_register_combiners2 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glCombinerStageParameterfvNV (GLenum, GLenum, const GLfloat *);
+GLAPI void APIENTRY glGetCombinerStageParameterfvNV (GLenum, GLenum, GLfloat *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLCOMBINERSTAGEPARAMETERFVNVPROC) (GLenum stage, GLenum pname, const GLfloat *params);
+typedef void (APIENTRYP PFNGLGETCOMBINERSTAGEPARAMETERFVNVPROC) (GLenum stage, GLenum pname, GLfloat *params);
+#endif
+
+#ifndef GL_NV_texture_compression_vtc
+#define GL_NV_texture_compression_vtc 1
+#endif
+
+#ifndef GL_NV_texture_rectangle
+#define GL_NV_texture_rectangle 1
+#endif
+
+#ifndef GL_NV_texture_shader
+#define GL_NV_texture_shader 1
+#endif
+
+#ifndef GL_NV_texture_shader2
+#define GL_NV_texture_shader2 1
+#endif
+
+#ifndef GL_NV_vertex_array_range2
+#define GL_NV_vertex_array_range2 1
+#endif
+
+#ifndef GL_NV_vertex_program
+#define GL_NV_vertex_program 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI GLboolean APIENTRY glAreProgramsResidentNV (GLsizei, const GLuint *, GLboolean *);
+GLAPI void APIENTRY glBindProgramNV (GLenum, GLuint);
+GLAPI void APIENTRY glDeleteProgramsNV (GLsizei, const GLuint *);
+GLAPI void APIENTRY glExecuteProgramNV (GLenum, GLuint, const GLfloat *);
+GLAPI void APIENTRY glGenProgramsNV (GLsizei, GLuint *);
+GLAPI void APIENTRY glGetProgramParameterdvNV (GLenum, GLuint, GLenum, GLdouble *);
+GLAPI void APIENTRY glGetProgramParameterfvNV (GLenum, GLuint, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetProgramivNV (GLuint, GLenum, GLint *);
+GLAPI void APIENTRY glGetProgramStringNV (GLuint, GLenum, GLubyte *);
+GLAPI void APIENTRY glGetTrackMatrixivNV (GLenum, GLuint, GLenum, GLint *);
+GLAPI void APIENTRY glGetVertexAttribdvNV (GLuint, GLenum, GLdouble *);
+GLAPI void APIENTRY glGetVertexAttribfvNV (GLuint, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetVertexAttribivNV (GLuint, GLenum, GLint *);
+GLAPI void APIENTRY glGetVertexAttribPointervNV (GLuint, GLenum, GLvoid* *);
+GLAPI GLboolean APIENTRY glIsProgramNV (GLuint);
+GLAPI void APIENTRY glLoadProgramNV (GLenum, GLuint, GLsizei, const GLubyte *);
+GLAPI void APIENTRY glProgramParameter4dNV (GLenum, GLuint, GLdouble, GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glProgramParameter4dvNV (GLenum, GLuint, const GLdouble *);
+GLAPI void APIENTRY glProgramParameter4fNV (GLenum, GLuint, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glProgramParameter4fvNV (GLenum, GLuint, const GLfloat *);
+GLAPI void APIENTRY glProgramParameters4dvNV (GLenum, GLuint, GLuint, const GLdouble *);
+GLAPI void APIENTRY glProgramParameters4fvNV (GLenum, GLuint, GLuint, const GLfloat *);
+GLAPI void APIENTRY glRequestResidentProgramsNV (GLsizei, const GLuint *);
+GLAPI void APIENTRY glTrackMatrixNV (GLenum, GLuint, GLenum, GLenum);
+GLAPI void APIENTRY glVertexAttribPointerNV (GLuint, GLint, GLenum, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glVertexAttrib1dNV (GLuint, GLdouble);
+GLAPI void APIENTRY glVertexAttrib1dvNV (GLuint, const GLdouble *);
+GLAPI void APIENTRY glVertexAttrib1fNV (GLuint, GLfloat);
+GLAPI void APIENTRY glVertexAttrib1fvNV (GLuint, const GLfloat *);
+GLAPI void APIENTRY glVertexAttrib1sNV (GLuint, GLshort);
+GLAPI void APIENTRY glVertexAttrib1svNV (GLuint, const GLshort *);
+GLAPI void APIENTRY glVertexAttrib2dNV (GLuint, GLdouble, GLdouble);
+GLAPI void APIENTRY glVertexAttrib2dvNV (GLuint, const GLdouble *);
+GLAPI void APIENTRY glVertexAttrib2fNV (GLuint, GLfloat, GLfloat);
+GLAPI void APIENTRY glVertexAttrib2fvNV (GLuint, const GLfloat *);
+GLAPI void APIENTRY glVertexAttrib2sNV (GLuint, GLshort, GLshort);
+GLAPI void APIENTRY glVertexAttrib2svNV (GLuint, const GLshort *);
+GLAPI void APIENTRY glVertexAttrib3dNV (GLuint, GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glVertexAttrib3dvNV (GLuint, const GLdouble *);
+GLAPI void APIENTRY glVertexAttrib3fNV (GLuint, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glVertexAttrib3fvNV (GLuint, const GLfloat *);
+GLAPI void APIENTRY glVertexAttrib3sNV (GLuint, GLshort, GLshort, GLshort);
+GLAPI void APIENTRY glVertexAttrib3svNV (GLuint, const GLshort *);
+GLAPI void APIENTRY glVertexAttrib4dNV (GLuint, GLdouble, GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glVertexAttrib4dvNV (GLuint, const GLdouble *);
+GLAPI void APIENTRY glVertexAttrib4fNV (GLuint, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glVertexAttrib4fvNV (GLuint, const GLfloat *);
+GLAPI void APIENTRY glVertexAttrib4sNV (GLuint, GLshort, GLshort, GLshort, GLshort);
+GLAPI void APIENTRY glVertexAttrib4svNV (GLuint, const GLshort *);
+GLAPI void APIENTRY glVertexAttrib4ubNV (GLuint, GLubyte, GLubyte, GLubyte, GLubyte);
+GLAPI void APIENTRY glVertexAttrib4ubvNV (GLuint, const GLubyte *);
+GLAPI void APIENTRY glVertexAttribs1dvNV (GLuint, GLsizei, const GLdouble *);
+GLAPI void APIENTRY glVertexAttribs1fvNV (GLuint, GLsizei, const GLfloat *);
+GLAPI void APIENTRY glVertexAttribs1svNV (GLuint, GLsizei, const GLshort *);
+GLAPI void APIENTRY glVertexAttribs2dvNV (GLuint, GLsizei, const GLdouble *);
+GLAPI void APIENTRY glVertexAttribs2fvNV (GLuint, GLsizei, const GLfloat *);
+GLAPI void APIENTRY glVertexAttribs2svNV (GLuint, GLsizei, const GLshort *);
+GLAPI void APIENTRY glVertexAttribs3dvNV (GLuint, GLsizei, const GLdouble *);
+GLAPI void APIENTRY glVertexAttribs3fvNV (GLuint, GLsizei, const GLfloat *);
+GLAPI void APIENTRY glVertexAttribs3svNV (GLuint, GLsizei, const GLshort *);
+GLAPI void APIENTRY glVertexAttribs4dvNV (GLuint, GLsizei, const GLdouble *);
+GLAPI void APIENTRY glVertexAttribs4fvNV (GLuint, GLsizei, const GLfloat *);
+GLAPI void APIENTRY glVertexAttribs4svNV (GLuint, GLsizei, const GLshort *);
+GLAPI void APIENTRY glVertexAttribs4ubvNV (GLuint, GLsizei, const GLubyte *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef GLboolean (APIENTRYP PFNGLAREPROGRAMSRESIDENTNVPROC) (GLsizei n, const GLuint *programs, GLboolean *residences);
+typedef void (APIENTRYP PFNGLBINDPROGRAMNVPROC) (GLenum target, GLuint id);
+typedef void (APIENTRYP PFNGLDELETEPROGRAMSNVPROC) (GLsizei n, const GLuint *programs);
+typedef void (APIENTRYP PFNGLEXECUTEPROGRAMNVPROC) (GLenum target, GLuint id, const GLfloat *params);
+typedef void (APIENTRYP PFNGLGENPROGRAMSNVPROC) (GLsizei n, GLuint *programs);
+typedef void (APIENTRYP PFNGLGETPROGRAMPARAMETERDVNVPROC) (GLenum target, GLuint index, GLenum pname, GLdouble *params);
+typedef void (APIENTRYP PFNGLGETPROGRAMPARAMETERFVNVPROC) (GLenum target, GLuint index, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETPROGRAMIVNVPROC) (GLuint id, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETPROGRAMSTRINGNVPROC) (GLuint id, GLenum pname, GLubyte *program);
+typedef void (APIENTRYP PFNGLGETTRACKMATRIXIVNVPROC) (GLenum target, GLuint address, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETVERTEXATTRIBDVNVPROC) (GLuint index, GLenum pname, GLdouble *params);
+typedef void (APIENTRYP PFNGLGETVERTEXATTRIBFVNVPROC) (GLuint index, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETVERTEXATTRIBIVNVPROC) (GLuint index, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETVERTEXATTRIBPOINTERVNVPROC) (GLuint index, GLenum pname, GLvoid* *pointer);
+typedef GLboolean (APIENTRYP PFNGLISPROGRAMNVPROC) (GLuint id);
+typedef void (APIENTRYP PFNGLLOADPROGRAMNVPROC) (GLenum target, GLuint id, GLsizei len, const GLubyte *program);
+typedef void (APIENTRYP PFNGLPROGRAMPARAMETER4DNVPROC) (GLenum target, GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w);
+typedef void (APIENTRYP PFNGLPROGRAMPARAMETER4DVNVPROC) (GLenum target, GLuint index, const GLdouble *v);
+typedef void (APIENTRYP PFNGLPROGRAMPARAMETER4FNVPROC) (GLenum target, GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+typedef void (APIENTRYP PFNGLPROGRAMPARAMETER4FVNVPROC) (GLenum target, GLuint index, const GLfloat *v);
+typedef void (APIENTRYP PFNGLPROGRAMPARAMETERS4DVNVPROC) (GLenum target, GLuint index, GLuint count, const GLdouble *v);
+typedef void (APIENTRYP PFNGLPROGRAMPARAMETERS4FVNVPROC) (GLenum target, GLuint index, GLuint count, const GLfloat *v);
+typedef void (APIENTRYP PFNGLREQUESTRESIDENTPROGRAMSNVPROC) (GLsizei n, const GLuint *programs);
+typedef void (APIENTRYP PFNGLTRACKMATRIXNVPROC) (GLenum target, GLuint address, GLenum matrix, GLenum transform);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBPOINTERNVPROC) (GLuint index, GLint fsize, GLenum type, GLsizei stride, const GLvoid *pointer);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB1DNVPROC) (GLuint index, GLdouble x);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB1DVNVPROC) (GLuint index, const GLdouble *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB1FNVPROC) (GLuint index, GLfloat x);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB1FVNVPROC) (GLuint index, const GLfloat *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB1SNVPROC) (GLuint index, GLshort x);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB1SVNVPROC) (GLuint index, const GLshort *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB2DNVPROC) (GLuint index, GLdouble x, GLdouble y);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB2DVNVPROC) (GLuint index, const GLdouble *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB2FNVPROC) (GLuint index, GLfloat x, GLfloat y);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB2FVNVPROC) (GLuint index, const GLfloat *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB2SNVPROC) (GLuint index, GLshort x, GLshort y);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB2SVNVPROC) (GLuint index, const GLshort *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB3DNVPROC) (GLuint index, GLdouble x, GLdouble y, GLdouble z);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB3DVNVPROC) (GLuint index, const GLdouble *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB3FNVPROC) (GLuint index, GLfloat x, GLfloat y, GLfloat z);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB3FVNVPROC) (GLuint index, const GLfloat *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB3SNVPROC) (GLuint index, GLshort x, GLshort y, GLshort z);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB3SVNVPROC) (GLuint index, const GLshort *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4DNVPROC) (GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4DVNVPROC) (GLuint index, const GLdouble *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4FNVPROC) (GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4FVNVPROC) (GLuint index, const GLfloat *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4SNVPROC) (GLuint index, GLshort x, GLshort y, GLshort z, GLshort w);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4SVNVPROC) (GLuint index, const GLshort *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4UBNVPROC) (GLuint index, GLubyte x, GLubyte y, GLubyte z, GLubyte w);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4UBVNVPROC) (GLuint index, const GLubyte *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBS1DVNVPROC) (GLuint index, GLsizei count, const GLdouble *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBS1FVNVPROC) (GLuint index, GLsizei count, const GLfloat *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBS1SVNVPROC) (GLuint index, GLsizei count, const GLshort *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBS2DVNVPROC) (GLuint index, GLsizei count, const GLdouble *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBS2FVNVPROC) (GLuint index, GLsizei count, const GLfloat *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBS2SVNVPROC) (GLuint index, GLsizei count, const GLshort *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBS3DVNVPROC) (GLuint index, GLsizei count, const GLdouble *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBS3FVNVPROC) (GLuint index, GLsizei count, const GLfloat *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBS3SVNVPROC) (GLuint index, GLsizei count, const GLshort *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBS4DVNVPROC) (GLuint index, GLsizei count, const GLdouble *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBS4FVNVPROC) (GLuint index, GLsizei count, const GLfloat *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBS4SVNVPROC) (GLuint index, GLsizei count, const GLshort *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBS4UBVNVPROC) (GLuint index, GLsizei count, const GLubyte *v);
+#endif
+
+#ifndef GL_SGIX_texture_coordinate_clamp
+#define GL_SGIX_texture_coordinate_clamp 1
+#endif
+
+#ifndef GL_SGIX_scalebias_hint
+#define GL_SGIX_scalebias_hint 1
+#endif
+
+#ifndef GL_OML_interlace
+#define GL_OML_interlace 1
+#endif
+
+#ifndef GL_OML_subsample
+#define GL_OML_subsample 1
+#endif
+
+#ifndef GL_OML_resample
+#define GL_OML_resample 1
+#endif
+
+#ifndef GL_NV_copy_depth_to_color
+#define GL_NV_copy_depth_to_color 1
+#endif
+
+#ifndef GL_ATI_envmap_bumpmap
+#define GL_ATI_envmap_bumpmap 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glTexBumpParameterivATI (GLenum, const GLint *);
+GLAPI void APIENTRY glTexBumpParameterfvATI (GLenum, const GLfloat *);
+GLAPI void APIENTRY glGetTexBumpParameterivATI (GLenum, GLint *);
+GLAPI void APIENTRY glGetTexBumpParameterfvATI (GLenum, GLfloat *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLTEXBUMPPARAMETERIVATIPROC) (GLenum pname, const GLint *param);
+typedef void (APIENTRYP PFNGLTEXBUMPPARAMETERFVATIPROC) (GLenum pname, const GLfloat *param);
+typedef void (APIENTRYP PFNGLGETTEXBUMPPARAMETERIVATIPROC) (GLenum pname, GLint *param);
+typedef void (APIENTRYP PFNGLGETTEXBUMPPARAMETERFVATIPROC) (GLenum pname, GLfloat *param);
+#endif
+
+#ifndef GL_ATI_fragment_shader
+#define GL_ATI_fragment_shader 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI GLuint APIENTRY glGenFragmentShadersATI (GLuint);
+GLAPI void APIENTRY glBindFragmentShaderATI (GLuint);
+GLAPI void APIENTRY glDeleteFragmentShaderATI (GLuint);
+GLAPI void APIENTRY glBeginFragmentShaderATI (void);
+GLAPI void APIENTRY glEndFragmentShaderATI (void);
+GLAPI void APIENTRY glPassTexCoordATI (GLuint, GLuint, GLenum);
+GLAPI void APIENTRY glSampleMapATI (GLuint, GLuint, GLenum);
+GLAPI void APIENTRY glColorFragmentOp1ATI (GLenum, GLuint, GLuint, GLuint, GLuint, GLuint, GLuint);
+GLAPI void APIENTRY glColorFragmentOp2ATI (GLenum, GLuint, GLuint, GLuint, GLuint, GLuint, GLuint, GLuint, GLuint, GLuint);
+GLAPI void APIENTRY glColorFragmentOp3ATI (GLenum, GLuint, GLuint, GLuint, GLuint, GLuint, GLuint, GLuint, GLuint, GLuint, GLuint, GLuint, GLuint);
+GLAPI void APIENTRY glAlphaFragmentOp1ATI (GLenum, GLuint, GLuint, GLuint, GLuint, GLuint);
+GLAPI void APIENTRY glAlphaFragmentOp2ATI (GLenum, GLuint, GLuint, GLuint, GLuint, GLuint, GLuint, GLuint, GLuint);
+GLAPI void APIENTRY glAlphaFragmentOp3ATI (GLenum, GLuint, GLuint, GLuint, GLuint, GLuint, GLuint, GLuint, GLuint, GLuint, GLuint, GLuint);
+GLAPI void APIENTRY glSetFragmentShaderConstantATI (GLuint, const GLfloat *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef GLuint (APIENTRYP PFNGLGENFRAGMENTSHADERSATIPROC) (GLuint range);
+typedef void (APIENTRYP PFNGLBINDFRAGMENTSHADERATIPROC) (GLuint id);
+typedef void (APIENTRYP PFNGLDELETEFRAGMENTSHADERATIPROC) (GLuint id);
+typedef void (APIENTRYP PFNGLBEGINFRAGMENTSHADERATIPROC) (void);
+typedef void (APIENTRYP PFNGLENDFRAGMENTSHADERATIPROC) (void);
+typedef void (APIENTRYP PFNGLPASSTEXCOORDATIPROC) (GLuint dst, GLuint coord, GLenum swizzle);
+typedef void (APIENTRYP PFNGLSAMPLEMAPATIPROC) (GLuint dst, GLuint interp, GLenum swizzle);
+typedef void (APIENTRYP PFNGLCOLORFRAGMENTOP1ATIPROC) (GLenum op, GLuint dst, GLuint dstMask, GLuint dstMod, GLuint arg1, GLuint arg1Rep, GLuint arg1Mod);
+typedef void (APIENTRYP PFNGLCOLORFRAGMENTOP2ATIPROC) (GLenum op, GLuint dst, GLuint dstMask, GLuint dstMod, GLuint arg1, GLuint arg1Rep, GLuint arg1Mod, GLuint arg2, GLuint arg2Rep, GLuint arg2Mod);
+typedef void (APIENTRYP PFNGLCOLORFRAGMENTOP3ATIPROC) (GLenum op, GLuint dst, GLuint dstMask, GLuint dstMod, GLuint arg1, GLuint arg1Rep, GLuint arg1Mod, GLuint arg2, GLuint arg2Rep, GLuint arg2Mod, GLuint arg3, GLuint arg3Rep, GLuint arg3Mod);
+typedef void (APIENTRYP PFNGLALPHAFRAGMENTOP1ATIPROC) (GLenum op, GLuint dst, GLuint dstMod, GLuint arg1, GLuint arg1Rep, GLuint arg1Mod);
+typedef void (APIENTRYP PFNGLALPHAFRAGMENTOP2ATIPROC) (GLenum op, GLuint dst, GLuint dstMod, GLuint arg1, GLuint arg1Rep, GLuint arg1Mod, GLuint arg2, GLuint arg2Rep, GLuint arg2Mod);
+typedef void (APIENTRYP PFNGLALPHAFRAGMENTOP3ATIPROC) (GLenum op, GLuint dst, GLuint dstMod, GLuint arg1, GLuint arg1Rep, GLuint arg1Mod, GLuint arg2, GLuint arg2Rep, GLuint arg2Mod, GLuint arg3, GLuint arg3Rep, GLuint arg3Mod);
+typedef void (APIENTRYP PFNGLSETFRAGMENTSHADERCONSTANTATIPROC) (GLuint dst, const GLfloat *value);
+#endif
+
+#ifndef GL_ATI_pn_triangles
+#define GL_ATI_pn_triangles 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glPNTrianglesiATI (GLenum, GLint);
+GLAPI void APIENTRY glPNTrianglesfATI (GLenum, GLfloat);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLPNTRIANGLESIATIPROC) (GLenum pname, GLint param);
+typedef void (APIENTRYP PFNGLPNTRIANGLESFATIPROC) (GLenum pname, GLfloat param);
+#endif
+
+#ifndef GL_ATI_vertex_array_object
+#define GL_ATI_vertex_array_object 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI GLuint APIENTRY glNewObjectBufferATI (GLsizei, const GLvoid *, GLenum);
+GLAPI GLboolean APIENTRY glIsObjectBufferATI (GLuint);
+GLAPI void APIENTRY glUpdateObjectBufferATI (GLuint, GLuint, GLsizei, const GLvoid *, GLenum);
+GLAPI void APIENTRY glGetObjectBufferfvATI (GLuint, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetObjectBufferivATI (GLuint, GLenum, GLint *);
+GLAPI void APIENTRY glFreeObjectBufferATI (GLuint);
+GLAPI void APIENTRY glArrayObjectATI (GLenum, GLint, GLenum, GLsizei, GLuint, GLuint);
+GLAPI void APIENTRY glGetArrayObjectfvATI (GLenum, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetArrayObjectivATI (GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glVariantArrayObjectATI (GLuint, GLenum, GLsizei, GLuint, GLuint);
+GLAPI void APIENTRY glGetVariantArrayObjectfvATI (GLuint, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetVariantArrayObjectivATI (GLuint, GLenum, GLint *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef GLuint (APIENTRYP PFNGLNEWOBJECTBUFFERATIPROC) (GLsizei size, const GLvoid *pointer, GLenum usage);
+typedef GLboolean (APIENTRYP PFNGLISOBJECTBUFFERATIPROC) (GLuint buffer);
+typedef void (APIENTRYP PFNGLUPDATEOBJECTBUFFERATIPROC) (GLuint buffer, GLuint offset, GLsizei size, const GLvoid *pointer, GLenum preserve);
+typedef void (APIENTRYP PFNGLGETOBJECTBUFFERFVATIPROC) (GLuint buffer, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETOBJECTBUFFERIVATIPROC) (GLuint buffer, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLFREEOBJECTBUFFERATIPROC) (GLuint buffer);
+typedef void (APIENTRYP PFNGLARRAYOBJECTATIPROC) (GLenum array, GLint size, GLenum type, GLsizei stride, GLuint buffer, GLuint offset);
+typedef void (APIENTRYP PFNGLGETARRAYOBJECTFVATIPROC) (GLenum array, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETARRAYOBJECTIVATIPROC) (GLenum array, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLVARIANTARRAYOBJECTATIPROC) (GLuint id, GLenum type, GLsizei stride, GLuint buffer, GLuint offset);
+typedef void (APIENTRYP PFNGLGETVARIANTARRAYOBJECTFVATIPROC) (GLuint id, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETVARIANTARRAYOBJECTIVATIPROC) (GLuint id, GLenum pname, GLint *params);
+#endif
+
+#ifndef GL_EXT_vertex_shader
+#define GL_EXT_vertex_shader 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glBeginVertexShaderEXT (void);
+GLAPI void APIENTRY glEndVertexShaderEXT (void);
+GLAPI void APIENTRY glBindVertexShaderEXT (GLuint);
+GLAPI GLuint APIENTRY glGenVertexShadersEXT (GLuint);
+GLAPI void APIENTRY glDeleteVertexShaderEXT (GLuint);
+GLAPI void APIENTRY glShaderOp1EXT (GLenum, GLuint, GLuint);
+GLAPI void APIENTRY glShaderOp2EXT (GLenum, GLuint, GLuint, GLuint);
+GLAPI void APIENTRY glShaderOp3EXT (GLenum, GLuint, GLuint, GLuint, GLuint);
+GLAPI void APIENTRY glSwizzleEXT (GLuint, GLuint, GLenum, GLenum, GLenum, GLenum);
+GLAPI void APIENTRY glWriteMaskEXT (GLuint, GLuint, GLenum, GLenum, GLenum, GLenum);
+GLAPI void APIENTRY glInsertComponentEXT (GLuint, GLuint, GLuint);
+GLAPI void APIENTRY glExtractComponentEXT (GLuint, GLuint, GLuint);
+GLAPI GLuint APIENTRY glGenSymbolsEXT (GLenum, GLenum, GLenum, GLuint);
+GLAPI void APIENTRY glSetInvariantEXT (GLuint, GLenum, const GLvoid *);
+GLAPI void APIENTRY glSetLocalConstantEXT (GLuint, GLenum, const GLvoid *);
+GLAPI void APIENTRY glVariantbvEXT (GLuint, const GLbyte *);
+GLAPI void APIENTRY glVariantsvEXT (GLuint, const GLshort *);
+GLAPI void APIENTRY glVariantivEXT (GLuint, const GLint *);
+GLAPI void APIENTRY glVariantfvEXT (GLuint, const GLfloat *);
+GLAPI void APIENTRY glVariantdvEXT (GLuint, const GLdouble *);
+GLAPI void APIENTRY glVariantubvEXT (GLuint, const GLubyte *);
+GLAPI void APIENTRY glVariantusvEXT (GLuint, const GLushort *);
+GLAPI void APIENTRY glVariantuivEXT (GLuint, const GLuint *);
+GLAPI void APIENTRY glVariantPointerEXT (GLuint, GLenum, GLuint, const GLvoid *);
+GLAPI void APIENTRY glEnableVariantClientStateEXT (GLuint);
+GLAPI void APIENTRY glDisableVariantClientStateEXT (GLuint);
+GLAPI GLuint APIENTRY glBindLightParameterEXT (GLenum, GLenum);
+GLAPI GLuint APIENTRY glBindMaterialParameterEXT (GLenum, GLenum);
+GLAPI GLuint APIENTRY glBindTexGenParameterEXT (GLenum, GLenum, GLenum);
+GLAPI GLuint APIENTRY glBindTextureUnitParameterEXT (GLenum, GLenum);
+GLAPI GLuint APIENTRY glBindParameterEXT (GLenum);
+GLAPI GLboolean APIENTRY glIsVariantEnabledEXT (GLuint, GLenum);
+GLAPI void APIENTRY glGetVariantBooleanvEXT (GLuint, GLenum, GLboolean *);
+GLAPI void APIENTRY glGetVariantIntegervEXT (GLuint, GLenum, GLint *);
+GLAPI void APIENTRY glGetVariantFloatvEXT (GLuint, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetVariantPointervEXT (GLuint, GLenum, GLvoid* *);
+GLAPI void APIENTRY glGetInvariantBooleanvEXT (GLuint, GLenum, GLboolean *);
+GLAPI void APIENTRY glGetInvariantIntegervEXT (GLuint, GLenum, GLint *);
+GLAPI void APIENTRY glGetInvariantFloatvEXT (GLuint, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetLocalConstantBooleanvEXT (GLuint, GLenum, GLboolean *);
+GLAPI void APIENTRY glGetLocalConstantIntegervEXT (GLuint, GLenum, GLint *);
+GLAPI void APIENTRY glGetLocalConstantFloatvEXT (GLuint, GLenum, GLfloat *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLBEGINVERTEXSHADEREXTPROC) (void);
+typedef void (APIENTRYP PFNGLENDVERTEXSHADEREXTPROC) (void);
+typedef void (APIENTRYP PFNGLBINDVERTEXSHADEREXTPROC) (GLuint id);
+typedef GLuint (APIENTRYP PFNGLGENVERTEXSHADERSEXTPROC) (GLuint range);
+typedef void (APIENTRYP PFNGLDELETEVERTEXSHADEREXTPROC) (GLuint id);
+typedef void (APIENTRYP PFNGLSHADEROP1EXTPROC) (GLenum op, GLuint res, GLuint arg1);
+typedef void (APIENTRYP PFNGLSHADEROP2EXTPROC) (GLenum op, GLuint res, GLuint arg1, GLuint arg2);
+typedef void (APIENTRYP PFNGLSHADEROP3EXTPROC) (GLenum op, GLuint res, GLuint arg1, GLuint arg2, GLuint arg3);
+typedef void (APIENTRYP PFNGLSWIZZLEEXTPROC) (GLuint res, GLuint in, GLenum outX, GLenum outY, GLenum outZ, GLenum outW);
+typedef void (APIENTRYP PFNGLWRITEMASKEXTPROC) (GLuint res, GLuint in, GLenum outX, GLenum outY, GLenum outZ, GLenum outW);
+typedef void (APIENTRYP PFNGLINSERTCOMPONENTEXTPROC) (GLuint res, GLuint src, GLuint num);
+typedef void (APIENTRYP PFNGLEXTRACTCOMPONENTEXTPROC) (GLuint res, GLuint src, GLuint num);
+typedef GLuint (APIENTRYP PFNGLGENSYMBOLSEXTPROC) (GLenum datatype, GLenum storagetype, GLenum range, GLuint components);
+typedef void (APIENTRYP PFNGLSETINVARIANTEXTPROC) (GLuint id, GLenum type, const GLvoid *addr);
+typedef void (APIENTRYP PFNGLSETLOCALCONSTANTEXTPROC) (GLuint id, GLenum type, const GLvoid *addr);
+typedef void (APIENTRYP PFNGLVARIANTBVEXTPROC) (GLuint id, const GLbyte *addr);
+typedef void (APIENTRYP PFNGLVARIANTSVEXTPROC) (GLuint id, const GLshort *addr);
+typedef void (APIENTRYP PFNGLVARIANTIVEXTPROC) (GLuint id, const GLint *addr);
+typedef void (APIENTRYP PFNGLVARIANTFVEXTPROC) (GLuint id, const GLfloat *addr);
+typedef void (APIENTRYP PFNGLVARIANTDVEXTPROC) (GLuint id, const GLdouble *addr);
+typedef void (APIENTRYP PFNGLVARIANTUBVEXTPROC) (GLuint id, const GLubyte *addr);
+typedef void (APIENTRYP PFNGLVARIANTUSVEXTPROC) (GLuint id, const GLushort *addr);
+typedef void (APIENTRYP PFNGLVARIANTUIVEXTPROC) (GLuint id, const GLuint *addr);
+typedef void (APIENTRYP PFNGLVARIANTPOINTEREXTPROC) (GLuint id, GLenum type, GLuint stride, const GLvoid *addr);
+typedef void (APIENTRYP PFNGLENABLEVARIANTCLIENTSTATEEXTPROC) (GLuint id);
+typedef void (APIENTRYP PFNGLDISABLEVARIANTCLIENTSTATEEXTPROC) (GLuint id);
+typedef GLuint (APIENTRYP PFNGLBINDLIGHTPARAMETEREXTPROC) (GLenum light, GLenum value);
+typedef GLuint (APIENTRYP PFNGLBINDMATERIALPARAMETEREXTPROC) (GLenum face, GLenum value);
+typedef GLuint (APIENTRYP PFNGLBINDTEXGENPARAMETEREXTPROC) (GLenum unit, GLenum coord, GLenum value);
+typedef GLuint (APIENTRYP PFNGLBINDTEXTUREUNITPARAMETEREXTPROC) (GLenum unit, GLenum value);
+typedef GLuint (APIENTRYP PFNGLBINDPARAMETEREXTPROC) (GLenum value);
+typedef GLboolean (APIENTRYP PFNGLISVARIANTENABLEDEXTPROC) (GLuint id, GLenum cap);
+typedef void (APIENTRYP PFNGLGETVARIANTBOOLEANVEXTPROC) (GLuint id, GLenum value, GLboolean *data);
+typedef void (APIENTRYP PFNGLGETVARIANTINTEGERVEXTPROC) (GLuint id, GLenum value, GLint *data);
+typedef void (APIENTRYP PFNGLGETVARIANTFLOATVEXTPROC) (GLuint id, GLenum value, GLfloat *data);
+typedef void (APIENTRYP PFNGLGETVARIANTPOINTERVEXTPROC) (GLuint id, GLenum value, GLvoid* *data);
+typedef void (APIENTRYP PFNGLGETINVARIANTBOOLEANVEXTPROC) (GLuint id, GLenum value, GLboolean *data);
+typedef void (APIENTRYP PFNGLGETINVARIANTINTEGERVEXTPROC) (GLuint id, GLenum value, GLint *data);
+typedef void (APIENTRYP PFNGLGETINVARIANTFLOATVEXTPROC) (GLuint id, GLenum value, GLfloat *data);
+typedef void (APIENTRYP PFNGLGETLOCALCONSTANTBOOLEANVEXTPROC) (GLuint id, GLenum value, GLboolean *data);
+typedef void (APIENTRYP PFNGLGETLOCALCONSTANTINTEGERVEXTPROC) (GLuint id, GLenum value, GLint *data);
+typedef void (APIENTRYP PFNGLGETLOCALCONSTANTFLOATVEXTPROC) (GLuint id, GLenum value, GLfloat *data);
+#endif
+
+#ifndef GL_ATI_vertex_streams
+#define GL_ATI_vertex_streams 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glVertexStream1sATI (GLenum, GLshort);
+GLAPI void APIENTRY glVertexStream1svATI (GLenum, const GLshort *);
+GLAPI void APIENTRY glVertexStream1iATI (GLenum, GLint);
+GLAPI void APIENTRY glVertexStream1ivATI (GLenum, const GLint *);
+GLAPI void APIENTRY glVertexStream1fATI (GLenum, GLfloat);
+GLAPI void APIENTRY glVertexStream1fvATI (GLenum, const GLfloat *);
+GLAPI void APIENTRY glVertexStream1dATI (GLenum, GLdouble);
+GLAPI void APIENTRY glVertexStream1dvATI (GLenum, const GLdouble *);
+GLAPI void APIENTRY glVertexStream2sATI (GLenum, GLshort, GLshort);
+GLAPI void APIENTRY glVertexStream2svATI (GLenum, const GLshort *);
+GLAPI void APIENTRY glVertexStream2iATI (GLenum, GLint, GLint);
+GLAPI void APIENTRY glVertexStream2ivATI (GLenum, const GLint *);
+GLAPI void APIENTRY glVertexStream2fATI (GLenum, GLfloat, GLfloat);
+GLAPI void APIENTRY glVertexStream2fvATI (GLenum, const GLfloat *);
+GLAPI void APIENTRY glVertexStream2dATI (GLenum, GLdouble, GLdouble);
+GLAPI void APIENTRY glVertexStream2dvATI (GLenum, const GLdouble *);
+GLAPI void APIENTRY glVertexStream3sATI (GLenum, GLshort, GLshort, GLshort);
+GLAPI void APIENTRY glVertexStream3svATI (GLenum, const GLshort *);
+GLAPI void APIENTRY glVertexStream3iATI (GLenum, GLint, GLint, GLint);
+GLAPI void APIENTRY glVertexStream3ivATI (GLenum, const GLint *);
+GLAPI void APIENTRY glVertexStream3fATI (GLenum, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glVertexStream3fvATI (GLenum, const GLfloat *);
+GLAPI void APIENTRY glVertexStream3dATI (GLenum, GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glVertexStream3dvATI (GLenum, const GLdouble *);
+GLAPI void APIENTRY glVertexStream4sATI (GLenum, GLshort, GLshort, GLshort, GLshort);
+GLAPI void APIENTRY glVertexStream4svATI (GLenum, const GLshort *);
+GLAPI void APIENTRY glVertexStream4iATI (GLenum, GLint, GLint, GLint, GLint);
+GLAPI void APIENTRY glVertexStream4ivATI (GLenum, const GLint *);
+GLAPI void APIENTRY glVertexStream4fATI (GLenum, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glVertexStream4fvATI (GLenum, const GLfloat *);
+GLAPI void APIENTRY glVertexStream4dATI (GLenum, GLdouble, GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glVertexStream4dvATI (GLenum, const GLdouble *);
+GLAPI void APIENTRY glNormalStream3bATI (GLenum, GLbyte, GLbyte, GLbyte);
+GLAPI void APIENTRY glNormalStream3bvATI (GLenum, const GLbyte *);
+GLAPI void APIENTRY glNormalStream3sATI (GLenum, GLshort, GLshort, GLshort);
+GLAPI void APIENTRY glNormalStream3svATI (GLenum, const GLshort *);
+GLAPI void APIENTRY glNormalStream3iATI (GLenum, GLint, GLint, GLint);
+GLAPI void APIENTRY glNormalStream3ivATI (GLenum, const GLint *);
+GLAPI void APIENTRY glNormalStream3fATI (GLenum, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glNormalStream3fvATI (GLenum, const GLfloat *);
+GLAPI void APIENTRY glNormalStream3dATI (GLenum, GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glNormalStream3dvATI (GLenum, const GLdouble *);
+GLAPI void APIENTRY glClientActiveVertexStreamATI (GLenum);
+GLAPI void APIENTRY glVertexBlendEnviATI (GLenum, GLint);
+GLAPI void APIENTRY glVertexBlendEnvfATI (GLenum, GLfloat);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLVERTEXSTREAM1SATIPROC) (GLenum stream, GLshort x);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM1SVATIPROC) (GLenum stream, const GLshort *coords);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM1IATIPROC) (GLenum stream, GLint x);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM1IVATIPROC) (GLenum stream, const GLint *coords);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM1FATIPROC) (GLenum stream, GLfloat x);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM1FVATIPROC) (GLenum stream, const GLfloat *coords);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM1DATIPROC) (GLenum stream, GLdouble x);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM1DVATIPROC) (GLenum stream, const GLdouble *coords);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM2SATIPROC) (GLenum stream, GLshort x, GLshort y);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM2SVATIPROC) (GLenum stream, const GLshort *coords);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM2IATIPROC) (GLenum stream, GLint x, GLint y);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM2IVATIPROC) (GLenum stream, const GLint *coords);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM2FATIPROC) (GLenum stream, GLfloat x, GLfloat y);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM2FVATIPROC) (GLenum stream, const GLfloat *coords);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM2DATIPROC) (GLenum stream, GLdouble x, GLdouble y);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM2DVATIPROC) (GLenum stream, const GLdouble *coords);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM3SATIPROC) (GLenum stream, GLshort x, GLshort y, GLshort z);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM3SVATIPROC) (GLenum stream, const GLshort *coords);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM3IATIPROC) (GLenum stream, GLint x, GLint y, GLint z);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM3IVATIPROC) (GLenum stream, const GLint *coords);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM3FATIPROC) (GLenum stream, GLfloat x, GLfloat y, GLfloat z);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM3FVATIPROC) (GLenum stream, const GLfloat *coords);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM3DATIPROC) (GLenum stream, GLdouble x, GLdouble y, GLdouble z);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM3DVATIPROC) (GLenum stream, const GLdouble *coords);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM4SATIPROC) (GLenum stream, GLshort x, GLshort y, GLshort z, GLshort w);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM4SVATIPROC) (GLenum stream, const GLshort *coords);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM4IATIPROC) (GLenum stream, GLint x, GLint y, GLint z, GLint w);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM4IVATIPROC) (GLenum stream, const GLint *coords);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM4FATIPROC) (GLenum stream, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM4FVATIPROC) (GLenum stream, const GLfloat *coords);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM4DATIPROC) (GLenum stream, GLdouble x, GLdouble y, GLdouble z, GLdouble w);
+typedef void (APIENTRYP PFNGLVERTEXSTREAM4DVATIPROC) (GLenum stream, const GLdouble *coords);
+typedef void (APIENTRYP PFNGLNORMALSTREAM3BATIPROC) (GLenum stream, GLbyte nx, GLbyte ny, GLbyte nz);
+typedef void (APIENTRYP PFNGLNORMALSTREAM3BVATIPROC) (GLenum stream, const GLbyte *coords);
+typedef void (APIENTRYP PFNGLNORMALSTREAM3SATIPROC) (GLenum stream, GLshort nx, GLshort ny, GLshort nz);
+typedef void (APIENTRYP PFNGLNORMALSTREAM3SVATIPROC) (GLenum stream, const GLshort *coords);
+typedef void (APIENTRYP PFNGLNORMALSTREAM3IATIPROC) (GLenum stream, GLint nx, GLint ny, GLint nz);
+typedef void (APIENTRYP PFNGLNORMALSTREAM3IVATIPROC) (GLenum stream, const GLint *coords);
+typedef void (APIENTRYP PFNGLNORMALSTREAM3FATIPROC) (GLenum stream, GLfloat nx, GLfloat ny, GLfloat nz);
+typedef void (APIENTRYP PFNGLNORMALSTREAM3FVATIPROC) (GLenum stream, const GLfloat *coords);
+typedef void (APIENTRYP PFNGLNORMALSTREAM3DATIPROC) (GLenum stream, GLdouble nx, GLdouble ny, GLdouble nz);
+typedef void (APIENTRYP PFNGLNORMALSTREAM3DVATIPROC) (GLenum stream, const GLdouble *coords);
+typedef void (APIENTRYP PFNGLCLIENTACTIVEVERTEXSTREAMATIPROC) (GLenum stream);
+typedef void (APIENTRYP PFNGLVERTEXBLENDENVIATIPROC) (GLenum pname, GLint param);
+typedef void (APIENTRYP PFNGLVERTEXBLENDENVFATIPROC) (GLenum pname, GLfloat param);
+#endif
+
+#ifndef GL_ATI_element_array
+#define GL_ATI_element_array 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glElementPointerATI (GLenum, const GLvoid *);
+GLAPI void APIENTRY glDrawElementArrayATI (GLenum, GLsizei);
+GLAPI void APIENTRY glDrawRangeElementArrayATI (GLenum, GLuint, GLuint, GLsizei);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLELEMENTPOINTERATIPROC) (GLenum type, const GLvoid *pointer);
+typedef void (APIENTRYP PFNGLDRAWELEMENTARRAYATIPROC) (GLenum mode, GLsizei count);
+typedef void (APIENTRYP PFNGLDRAWRANGEELEMENTARRAYATIPROC) (GLenum mode, GLuint start, GLuint end, GLsizei count);
+#endif
+
+#ifndef GL_SUN_mesh_array
+#define GL_SUN_mesh_array 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glDrawMeshArraysSUN (GLenum, GLint, GLsizei, GLsizei);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLDRAWMESHARRAYSSUNPROC) (GLenum mode, GLint first, GLsizei count, GLsizei width);
+#endif
+
+#ifndef GL_SUN_slice_accum
+#define GL_SUN_slice_accum 1
+#endif
+
+#ifndef GL_NV_multisample_filter_hint
+#define GL_NV_multisample_filter_hint 1
+#endif
+
+#ifndef GL_NV_depth_clamp
+#define GL_NV_depth_clamp 1
+#endif
+
+#ifndef GL_NV_occlusion_query
+#define GL_NV_occlusion_query 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glGenOcclusionQueriesNV (GLsizei, GLuint *);
+GLAPI void APIENTRY glDeleteOcclusionQueriesNV (GLsizei, const GLuint *);
+GLAPI GLboolean APIENTRY glIsOcclusionQueryNV (GLuint);
+GLAPI void APIENTRY glBeginOcclusionQueryNV (GLuint);
+GLAPI void APIENTRY glEndOcclusionQueryNV (void);
+GLAPI void APIENTRY glGetOcclusionQueryivNV (GLuint, GLenum, GLint *);
+GLAPI void APIENTRY glGetOcclusionQueryuivNV (GLuint, GLenum, GLuint *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLGENOCCLUSIONQUERIESNVPROC) (GLsizei n, GLuint *ids);
+typedef void (APIENTRYP PFNGLDELETEOCCLUSIONQUERIESNVPROC) (GLsizei n, const GLuint *ids);
+typedef GLboolean (APIENTRYP PFNGLISOCCLUSIONQUERYNVPROC) (GLuint id);
+typedef void (APIENTRYP PFNGLBEGINOCCLUSIONQUERYNVPROC) (GLuint id);
+typedef void (APIENTRYP PFNGLENDOCCLUSIONQUERYNVPROC) (void);
+typedef void (APIENTRYP PFNGLGETOCCLUSIONQUERYIVNVPROC) (GLuint id, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETOCCLUSIONQUERYUIVNVPROC) (GLuint id, GLenum pname, GLuint *params);
+#endif
+
+#ifndef GL_NV_point_sprite
+#define GL_NV_point_sprite 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glPointParameteriNV (GLenum, GLint);
+GLAPI void APIENTRY glPointParameterivNV (GLenum, const GLint *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLPOINTPARAMETERINVPROC) (GLenum pname, GLint param);
+typedef void (APIENTRYP PFNGLPOINTPARAMETERIVNVPROC) (GLenum pname, const GLint *params);
+#endif
+
+#ifndef GL_NV_texture_shader3
+#define GL_NV_texture_shader3 1
+#endif
+
+#ifndef GL_NV_vertex_program1_1
+#define GL_NV_vertex_program1_1 1
+#endif
+
+#ifndef GL_EXT_shadow_funcs
+#define GL_EXT_shadow_funcs 1
+#endif
+
+#ifndef GL_EXT_stencil_two_side
+#define GL_EXT_stencil_two_side 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glActiveStencilFaceEXT (GLenum);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLACTIVESTENCILFACEEXTPROC) (GLenum face);
+#endif
+
+#ifndef GL_ATI_text_fragment_shader
+#define GL_ATI_text_fragment_shader 1
+#endif
+
+#ifndef GL_APPLE_client_storage
+#define GL_APPLE_client_storage 1
+#endif
+
+#ifndef GL_APPLE_element_array
+#define GL_APPLE_element_array 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glElementPointerAPPLE (GLenum, const GLvoid *);
+GLAPI void APIENTRY glDrawElementArrayAPPLE (GLenum, GLint, GLsizei);
+GLAPI void APIENTRY glDrawRangeElementArrayAPPLE (GLenum, GLuint, GLuint, GLint, GLsizei);
+GLAPI void APIENTRY glMultiDrawElementArrayAPPLE (GLenum, const GLint *, const GLsizei *, GLsizei);
+GLAPI void APIENTRY glMultiDrawRangeElementArrayAPPLE (GLenum, GLuint, GLuint, const GLint *, const GLsizei *, GLsizei);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLELEMENTPOINTERAPPLEPROC) (GLenum type, const GLvoid *pointer);
+typedef void (APIENTRYP PFNGLDRAWELEMENTARRAYAPPLEPROC) (GLenum mode, GLint first, GLsizei count);
+typedef void (APIENTRYP PFNGLDRAWRANGEELEMENTARRAYAPPLEPROC) (GLenum mode, GLuint start, GLuint end, GLint first, GLsizei count);
+typedef void (APIENTRYP PFNGLMULTIDRAWELEMENTARRAYAPPLEPROC) (GLenum mode, const GLint *first, const GLsizei *count, GLsizei primcount);
+typedef void (APIENTRYP PFNGLMULTIDRAWRANGEELEMENTARRAYAPPLEPROC) (GLenum mode, GLuint start, GLuint end, const GLint *first, const GLsizei *count, GLsizei primcount);
+#endif
+
+#ifndef GL_APPLE_fence
+#define GL_APPLE_fence 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glGenFencesAPPLE (GLsizei, GLuint *);
+GLAPI void APIENTRY glDeleteFencesAPPLE (GLsizei, const GLuint *);
+GLAPI void APIENTRY glSetFenceAPPLE (GLuint);
+GLAPI GLboolean APIENTRY glIsFenceAPPLE (GLuint);
+GLAPI GLboolean APIENTRY glTestFenceAPPLE (GLuint);
+GLAPI void APIENTRY glFinishFenceAPPLE (GLuint);
+GLAPI GLboolean APIENTRY glTestObjectAPPLE (GLenum, GLuint);
+GLAPI void APIENTRY glFinishObjectAPPLE (GLenum, GLint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLGENFENCESAPPLEPROC) (GLsizei n, GLuint *fences);
+typedef void (APIENTRYP PFNGLDELETEFENCESAPPLEPROC) (GLsizei n, const GLuint *fences);
+typedef void (APIENTRYP PFNGLSETFENCEAPPLEPROC) (GLuint fence);
+typedef GLboolean (APIENTRYP PFNGLISFENCEAPPLEPROC) (GLuint fence);
+typedef GLboolean (APIENTRYP PFNGLTESTFENCEAPPLEPROC) (GLuint fence);
+typedef void (APIENTRYP PFNGLFINISHFENCEAPPLEPROC) (GLuint fence);
+typedef GLboolean (APIENTRYP PFNGLTESTOBJECTAPPLEPROC) (GLenum object, GLuint name);
+typedef void (APIENTRYP PFNGLFINISHOBJECTAPPLEPROC) (GLenum object, GLint name);
+#endif
+
+#ifndef GL_APPLE_vertex_array_object
+#define GL_APPLE_vertex_array_object 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glBindVertexArrayAPPLE (GLuint);
+GLAPI void APIENTRY glDeleteVertexArraysAPPLE (GLsizei, const GLuint *);
+GLAPI void APIENTRY glGenVertexArraysAPPLE (GLsizei, GLuint *);
+GLAPI GLboolean APIENTRY glIsVertexArrayAPPLE (GLuint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLBINDVERTEXARRAYAPPLEPROC) (GLuint array);
+typedef void (APIENTRYP PFNGLDELETEVERTEXARRAYSAPPLEPROC) (GLsizei n, const GLuint *arrays);
+typedef void (APIENTRYP PFNGLGENVERTEXARRAYSAPPLEPROC) (GLsizei n, GLuint *arrays);
+typedef GLboolean (APIENTRYP PFNGLISVERTEXARRAYAPPLEPROC) (GLuint array);
+#endif
+
+#ifndef GL_APPLE_vertex_array_range
+#define GL_APPLE_vertex_array_range 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glVertexArrayRangeAPPLE (GLsizei, GLvoid *);
+GLAPI void APIENTRY glFlushVertexArrayRangeAPPLE (GLsizei, GLvoid *);
+GLAPI void APIENTRY glVertexArrayParameteriAPPLE (GLenum, GLint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLVERTEXARRAYRANGEAPPLEPROC) (GLsizei length, GLvoid *pointer);
+typedef void (APIENTRYP PFNGLFLUSHVERTEXARRAYRANGEAPPLEPROC) (GLsizei length, GLvoid *pointer);
+typedef void (APIENTRYP PFNGLVERTEXARRAYPARAMETERIAPPLEPROC) (GLenum pname, GLint param);
+#endif
+
+#ifndef GL_APPLE_ycbcr_422
+#define GL_APPLE_ycbcr_422 1
+#endif
+
+#ifndef GL_S3_s3tc
+#define GL_S3_s3tc 1
+#endif
+
+#ifndef GL_ATI_draw_buffers
+#define GL_ATI_draw_buffers 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glDrawBuffersATI (GLsizei, const GLenum *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLDRAWBUFFERSATIPROC) (GLsizei n, const GLenum *bufs);
+#endif
+
+#ifndef GL_ATI_pixel_format_float
+#define GL_ATI_pixel_format_float 1
+/* This is really a WGL extension, but defines some associated GL enums.
+ * ATI does not export "GL_ATI_pixel_format_float" in the GL_EXTENSIONS string.
+ */
+#endif
+
+#ifndef GL_ATI_texture_env_combine3
+#define GL_ATI_texture_env_combine3 1
+#endif
+
+#ifndef GL_ATI_texture_float
+#define GL_ATI_texture_float 1
+#endif
+
+#ifndef GL_NV_float_buffer
+#define GL_NV_float_buffer 1
+#endif
+
+#ifndef GL_NV_fragment_program
+#define GL_NV_fragment_program 1
+/* Some NV_fragment_program entry points are shared with ARB_vertex_program. */
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glProgramNamedParameter4fNV (GLuint, GLsizei, const GLubyte *, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glProgramNamedParameter4dNV (GLuint, GLsizei, const GLubyte *, GLdouble, GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glProgramNamedParameter4fvNV (GLuint, GLsizei, const GLubyte *, const GLfloat *);
+GLAPI void APIENTRY glProgramNamedParameter4dvNV (GLuint, GLsizei, const GLubyte *, const GLdouble *);
+GLAPI void APIENTRY glGetProgramNamedParameterfvNV (GLuint, GLsizei, const GLubyte *, GLfloat *);
+GLAPI void APIENTRY glGetProgramNamedParameterdvNV (GLuint, GLsizei, const GLubyte *, GLdouble *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLPROGRAMNAMEDPARAMETER4FNVPROC) (GLuint id, GLsizei len, const GLubyte *name, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+typedef void (APIENTRYP PFNGLPROGRAMNAMEDPARAMETER4DNVPROC) (GLuint id, GLsizei len, const GLubyte *name, GLdouble x, GLdouble y, GLdouble z, GLdouble w);
+typedef void (APIENTRYP PFNGLPROGRAMNAMEDPARAMETER4FVNVPROC) (GLuint id, GLsizei len, const GLubyte *name, const GLfloat *v);
+typedef void (APIENTRYP PFNGLPROGRAMNAMEDPARAMETER4DVNVPROC) (GLuint id, GLsizei len, const GLubyte *name, const GLdouble *v);
+typedef void (APIENTRYP PFNGLGETPROGRAMNAMEDPARAMETERFVNVPROC) (GLuint id, GLsizei len, const GLubyte *name, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETPROGRAMNAMEDPARAMETERDVNVPROC) (GLuint id, GLsizei len, const GLubyte *name, GLdouble *params);
+#endif
+
+#ifndef GL_NV_half_float
+#define GL_NV_half_float 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glVertex2hNV (GLhalfNV, GLhalfNV);
+GLAPI void APIENTRY glVertex2hvNV (const GLhalfNV *);
+GLAPI void APIENTRY glVertex3hNV (GLhalfNV, GLhalfNV, GLhalfNV);
+GLAPI void APIENTRY glVertex3hvNV (const GLhalfNV *);
+GLAPI void APIENTRY glVertex4hNV (GLhalfNV, GLhalfNV, GLhalfNV, GLhalfNV);
+GLAPI void APIENTRY glVertex4hvNV (const GLhalfNV *);
+GLAPI void APIENTRY glNormal3hNV (GLhalfNV, GLhalfNV, GLhalfNV);
+GLAPI void APIENTRY glNormal3hvNV (const GLhalfNV *);
+GLAPI void APIENTRY glColor3hNV (GLhalfNV, GLhalfNV, GLhalfNV);
+GLAPI void APIENTRY glColor3hvNV (const GLhalfNV *);
+GLAPI void APIENTRY glColor4hNV (GLhalfNV, GLhalfNV, GLhalfNV, GLhalfNV);
+GLAPI void APIENTRY glColor4hvNV (const GLhalfNV *);
+GLAPI void APIENTRY glTexCoord1hNV (GLhalfNV);
+GLAPI void APIENTRY glTexCoord1hvNV (const GLhalfNV *);
+GLAPI void APIENTRY glTexCoord2hNV (GLhalfNV, GLhalfNV);
+GLAPI void APIENTRY glTexCoord2hvNV (const GLhalfNV *);
+GLAPI void APIENTRY glTexCoord3hNV (GLhalfNV, GLhalfNV, GLhalfNV);
+GLAPI void APIENTRY glTexCoord3hvNV (const GLhalfNV *);
+GLAPI void APIENTRY glTexCoord4hNV (GLhalfNV, GLhalfNV, GLhalfNV, GLhalfNV);
+GLAPI void APIENTRY glTexCoord4hvNV (const GLhalfNV *);
+GLAPI void APIENTRY glMultiTexCoord1hNV (GLenum, GLhalfNV);
+GLAPI void APIENTRY glMultiTexCoord1hvNV (GLenum, const GLhalfNV *);
+GLAPI void APIENTRY glMultiTexCoord2hNV (GLenum, GLhalfNV, GLhalfNV);
+GLAPI void APIENTRY glMultiTexCoord2hvNV (GLenum, const GLhalfNV *);
+GLAPI void APIENTRY glMultiTexCoord3hNV (GLenum, GLhalfNV, GLhalfNV, GLhalfNV);
+GLAPI void APIENTRY glMultiTexCoord3hvNV (GLenum, const GLhalfNV *);
+GLAPI void APIENTRY glMultiTexCoord4hNV (GLenum, GLhalfNV, GLhalfNV, GLhalfNV, GLhalfNV);
+GLAPI void APIENTRY glMultiTexCoord4hvNV (GLenum, const GLhalfNV *);
+GLAPI void APIENTRY glFogCoordhNV (GLhalfNV);
+GLAPI void APIENTRY glFogCoordhvNV (const GLhalfNV *);
+GLAPI void APIENTRY glSecondaryColor3hNV (GLhalfNV, GLhalfNV, GLhalfNV);
+GLAPI void APIENTRY glSecondaryColor3hvNV (const GLhalfNV *);
+GLAPI void APIENTRY glVertexWeighthNV (GLhalfNV);
+GLAPI void APIENTRY glVertexWeighthvNV (const GLhalfNV *);
+GLAPI void APIENTRY glVertexAttrib1hNV (GLuint, GLhalfNV);
+GLAPI void APIENTRY glVertexAttrib1hvNV (GLuint, const GLhalfNV *);
+GLAPI void APIENTRY glVertexAttrib2hNV (GLuint, GLhalfNV, GLhalfNV);
+GLAPI void APIENTRY glVertexAttrib2hvNV (GLuint, const GLhalfNV *);
+GLAPI void APIENTRY glVertexAttrib3hNV (GLuint, GLhalfNV, GLhalfNV, GLhalfNV);
+GLAPI void APIENTRY glVertexAttrib3hvNV (GLuint, const GLhalfNV *);
+GLAPI void APIENTRY glVertexAttrib4hNV (GLuint, GLhalfNV, GLhalfNV, GLhalfNV, GLhalfNV);
+GLAPI void APIENTRY glVertexAttrib4hvNV (GLuint, const GLhalfNV *);
+GLAPI void APIENTRY glVertexAttribs1hvNV (GLuint, GLsizei, const GLhalfNV *);
+GLAPI void APIENTRY glVertexAttribs2hvNV (GLuint, GLsizei, const GLhalfNV *);
+GLAPI void APIENTRY glVertexAttribs3hvNV (GLuint, GLsizei, const GLhalfNV *);
+GLAPI void APIENTRY glVertexAttribs4hvNV (GLuint, GLsizei, const GLhalfNV *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLVERTEX2HNVPROC) (GLhalfNV x, GLhalfNV y);
+typedef void (APIENTRYP PFNGLVERTEX2HVNVPROC) (const GLhalfNV *v);
+typedef void (APIENTRYP PFNGLVERTEX3HNVPROC) (GLhalfNV x, GLhalfNV y, GLhalfNV z);
+typedef void (APIENTRYP PFNGLVERTEX3HVNVPROC) (const GLhalfNV *v);
+typedef void (APIENTRYP PFNGLVERTEX4HNVPROC) (GLhalfNV x, GLhalfNV y, GLhalfNV z, GLhalfNV w);
+typedef void (APIENTRYP PFNGLVERTEX4HVNVPROC) (const GLhalfNV *v);
+typedef void (APIENTRYP PFNGLNORMAL3HNVPROC) (GLhalfNV nx, GLhalfNV ny, GLhalfNV nz);
+typedef void (APIENTRYP PFNGLNORMAL3HVNVPROC) (const GLhalfNV *v);
+typedef void (APIENTRYP PFNGLCOLOR3HNVPROC) (GLhalfNV red, GLhalfNV green, GLhalfNV blue);
+typedef void (APIENTRYP PFNGLCOLOR3HVNVPROC) (const GLhalfNV *v);
+typedef void (APIENTRYP PFNGLCOLOR4HNVPROC) (GLhalfNV red, GLhalfNV green, GLhalfNV blue, GLhalfNV alpha);
+typedef void (APIENTRYP PFNGLCOLOR4HVNVPROC) (const GLhalfNV *v);
+typedef void (APIENTRYP PFNGLTEXCOORD1HNVPROC) (GLhalfNV s);
+typedef void (APIENTRYP PFNGLTEXCOORD1HVNVPROC) (const GLhalfNV *v);
+typedef void (APIENTRYP PFNGLTEXCOORD2HNVPROC) (GLhalfNV s, GLhalfNV t);
+typedef void (APIENTRYP PFNGLTEXCOORD2HVNVPROC) (const GLhalfNV *v);
+typedef void (APIENTRYP PFNGLTEXCOORD3HNVPROC) (GLhalfNV s, GLhalfNV t, GLhalfNV r);
+typedef void (APIENTRYP PFNGLTEXCOORD3HVNVPROC) (const GLhalfNV *v);
+typedef void (APIENTRYP PFNGLTEXCOORD4HNVPROC) (GLhalfNV s, GLhalfNV t, GLhalfNV r, GLhalfNV q);
+typedef void (APIENTRYP PFNGLTEXCOORD4HVNVPROC) (const GLhalfNV *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD1HNVPROC) (GLenum target, GLhalfNV s);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD1HVNVPROC) (GLenum target, const GLhalfNV *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD2HNVPROC) (GLenum target, GLhalfNV s, GLhalfNV t);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD2HVNVPROC) (GLenum target, const GLhalfNV *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD3HNVPROC) (GLenum target, GLhalfNV s, GLhalfNV t, GLhalfNV r);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD3HVNVPROC) (GLenum target, const GLhalfNV *v);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD4HNVPROC) (GLenum target, GLhalfNV s, GLhalfNV t, GLhalfNV r, GLhalfNV q);
+typedef void (APIENTRYP PFNGLMULTITEXCOORD4HVNVPROC) (GLenum target, const GLhalfNV *v);
+typedef void (APIENTRYP PFNGLFOGCOORDHNVPROC) (GLhalfNV fog);
+typedef void (APIENTRYP PFNGLFOGCOORDHVNVPROC) (const GLhalfNV *fog);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3HNVPROC) (GLhalfNV red, GLhalfNV green, GLhalfNV blue);
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3HVNVPROC) (const GLhalfNV *v);
+typedef void (APIENTRYP PFNGLVERTEXWEIGHTHNVPROC) (GLhalfNV weight);
+typedef void (APIENTRYP PFNGLVERTEXWEIGHTHVNVPROC) (const GLhalfNV *weight);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB1HNVPROC) (GLuint index, GLhalfNV x);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB1HVNVPROC) (GLuint index, const GLhalfNV *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB2HNVPROC) (GLuint index, GLhalfNV x, GLhalfNV y);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB2HVNVPROC) (GLuint index, const GLhalfNV *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB3HNVPROC) (GLuint index, GLhalfNV x, GLhalfNV y, GLhalfNV z);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB3HVNVPROC) (GLuint index, const GLhalfNV *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4HNVPROC) (GLuint index, GLhalfNV x, GLhalfNV y, GLhalfNV z, GLhalfNV w);
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4HVNVPROC) (GLuint index, const GLhalfNV *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBS1HVNVPROC) (GLuint index, GLsizei n, const GLhalfNV *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBS2HVNVPROC) (GLuint index, GLsizei n, const GLhalfNV *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBS3HVNVPROC) (GLuint index, GLsizei n, const GLhalfNV *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBS4HVNVPROC) (GLuint index, GLsizei n, const GLhalfNV *v);
+#endif
+
+#ifndef GL_NV_pixel_data_range
+#define GL_NV_pixel_data_range 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glPixelDataRangeNV (GLenum, GLsizei, GLvoid *);
+GLAPI void APIENTRY glFlushPixelDataRangeNV (GLenum);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLPIXELDATARANGENVPROC) (GLenum target, GLsizei length, GLvoid *pointer);
+typedef void (APIENTRYP PFNGLFLUSHPIXELDATARANGENVPROC) (GLenum target);
+#endif
+
+#ifndef GL_NV_primitive_restart
+#define GL_NV_primitive_restart 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glPrimitiveRestartNV (void);
+GLAPI void APIENTRY glPrimitiveRestartIndexNV (GLuint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLPRIMITIVERESTARTNVPROC) (void);
+typedef void (APIENTRYP PFNGLPRIMITIVERESTARTINDEXNVPROC) (GLuint index);
+#endif
+
+#ifndef GL_NV_texture_expand_normal
+#define GL_NV_texture_expand_normal 1
+#endif
+
+#ifndef GL_NV_vertex_program2
+#define GL_NV_vertex_program2 1
+#endif
+
+#ifndef GL_ATI_map_object_buffer
+#define GL_ATI_map_object_buffer 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI GLvoid* APIENTRY glMaobjectBufferATI (GLuint);
+GLAPI void APIENTRY glUnmaobjectBufferATI (GLuint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef GLvoid* (APIENTRYP PFNGLMAPOBJECTBUFFERATIPROC) (GLuint buffer);
+typedef void (APIENTRYP PFNGLUNMAPOBJECTBUFFERATIPROC) (GLuint buffer);
+#endif
+
+#ifndef GL_ATI_separate_stencil
+#define GL_ATI_separate_stencil 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glStencilOpSeparateATI (GLenum, GLenum, GLenum, GLenum);
+GLAPI void APIENTRY glStencilFuncSeparateATI (GLenum, GLenum, GLint, GLuint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLSTENCILOPSEPARATEATIPROC) (GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass);
+typedef void (APIENTRYP PFNGLSTENCILFUNCSEPARATEATIPROC) (GLenum frontfunc, GLenum backfunc, GLint ref, GLuint mask);
+#endif
+
+#ifndef GL_ATI_vertex_attrib_array_object
+#define GL_ATI_vertex_attrib_array_object 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glVertexAttribArrayObjectATI (GLuint, GLint, GLenum, GLboolean, GLsizei, GLuint, GLuint);
+GLAPI void APIENTRY glGetVertexAttribArrayObjectfvATI (GLuint, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetVertexAttribArrayObjectivATI (GLuint, GLenum, GLint *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLVERTEXATTRIBARRAYOBJECTATIPROC) (GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride, GLuint buffer, GLuint offset);
+typedef void (APIENTRYP PFNGLGETVERTEXATTRIBARRAYOBJECTFVATIPROC) (GLuint index, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETVERTEXATTRIBARRAYOBJECTIVATIPROC) (GLuint index, GLenum pname, GLint *params);
+#endif
+
+#ifndef GL_OES_read_format
+#define GL_OES_read_format 1
+#endif
+
+#ifndef GL_EXT_depth_bounds_test
+#define GL_EXT_depth_bounds_test 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glDepthBoundsEXT (GLclampd, GLclampd);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLDEPTHBOUNDSEXTPROC) (GLclampd zmin, GLclampd zmax);
+#endif
+
+#ifndef GL_EXT_texture_mirror_clamp
+#define GL_EXT_texture_mirror_clamp 1
+#endif
+
+#ifndef GL_EXT_blend_equation_separate
+#define GL_EXT_blend_equation_separate 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glBlendEquationSeparateEXT (GLenum, GLenum);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLBLENDEQUATIONSEPARATEEXTPROC) (GLenum modeRGB, GLenum modeAlpha);
+#endif
+
+#ifndef GL_MESA_pack_invert
+#define GL_MESA_pack_invert 1
+#endif
+
+#ifndef GL_MESA_ycbcr_texture
+#define GL_MESA_ycbcr_texture 1
+#endif
+
+#ifndef GL_EXT_pixel_buffer_object
+#define GL_EXT_pixel_buffer_object 1
+#endif
+
+#ifndef GL_NV_fragment_program_option
+#define GL_NV_fragment_program_option 1
+#endif
+
+#ifndef GL_NV_fragment_program2
+#define GL_NV_fragment_program2 1
+#endif
+
+#ifndef GL_NV_vertex_program2_option
+#define GL_NV_vertex_program2_option 1
+#endif
+
+#ifndef GL_NV_vertex_program3
+#define GL_NV_vertex_program3 1
+#endif
+
+#ifndef GL_EXT_framebuffer_object
+#define GL_EXT_framebuffer_object 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI GLboolean APIENTRY glIsRenderbufferEXT (GLuint);
+GLAPI void APIENTRY glBindRenderbufferEXT (GLenum, GLuint);
+GLAPI void APIENTRY glDeleteRenderbuffersEXT (GLsizei, const GLuint *);
+GLAPI void APIENTRY glGenRenderbuffersEXT (GLsizei, GLuint *);
+GLAPI void APIENTRY glRenderbufferStorageEXT (GLenum, GLenum, GLsizei, GLsizei);
+GLAPI void APIENTRY glGetRenderbufferParameterivEXT (GLenum, GLenum, GLint *);
+GLAPI GLboolean APIENTRY glIsFramebufferEXT (GLuint);
+GLAPI void APIENTRY glBindFramebufferEXT (GLenum, GLuint);
+GLAPI void APIENTRY glDeleteFramebuffersEXT (GLsizei, const GLuint *);
+GLAPI void APIENTRY glGenFramebuffersEXT (GLsizei, GLuint *);
+GLAPI GLenum APIENTRY glCheckFramebufferStatusEXT (GLenum);
+GLAPI void APIENTRY glFramebufferTexture1DEXT (GLenum, GLenum, GLenum, GLuint, GLint);
+GLAPI void APIENTRY glFramebufferTexture2DEXT (GLenum, GLenum, GLenum, GLuint, GLint);
+GLAPI void APIENTRY glFramebufferTexture3DEXT (GLenum, GLenum, GLenum, GLuint, GLint, GLint);
+GLAPI void APIENTRY glFramebufferRenderbufferEXT (GLenum, GLenum, GLenum, GLuint);
+GLAPI void APIENTRY glGetFramebufferAttachmentParameterivEXT (GLenum, GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glGenerateMipmapEXT (GLenum);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef GLboolean (APIENTRYP PFNGLISRENDERBUFFEREXTPROC) (GLuint renderbuffer);
+typedef void (APIENTRYP PFNGLBINDRENDERBUFFEREXTPROC) (GLenum target, GLuint renderbuffer);
+typedef void (APIENTRYP PFNGLDELETERENDERBUFFERSEXTPROC) (GLsizei n, const GLuint *renderbuffers);
+typedef void (APIENTRYP PFNGLGENRENDERBUFFERSEXTPROC) (GLsizei n, GLuint *renderbuffers);
+typedef void (APIENTRYP PFNGLRENDERBUFFERSTORAGEEXTPROC) (GLenum target, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (APIENTRYP PFNGLGETRENDERBUFFERPARAMETERIVEXTPROC) (GLenum target, GLenum pname, GLint *params);
+typedef GLboolean (APIENTRYP PFNGLISFRAMEBUFFEREXTPROC) (GLuint framebuffer);
+typedef void (APIENTRYP PFNGLBINDFRAMEBUFFEREXTPROC) (GLenum target, GLuint framebuffer);
+typedef void (APIENTRYP PFNGLDELETEFRAMEBUFFERSEXTPROC) (GLsizei n, const GLuint *framebuffers);
+typedef void (APIENTRYP PFNGLGENFRAMEBUFFERSEXTPROC) (GLsizei n, GLuint *framebuffers);
+typedef GLenum (APIENTRYP PFNGLCHECKFRAMEBUFFERSTATUSEXTPROC) (GLenum target);
+typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURE1DEXTPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
+typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURE2DEXTPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
+typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURE3DEXTPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLint zoffset);
+typedef void (APIENTRYP PFNGLFRAMEBUFFERRENDERBUFFEREXTPROC) (GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer);
+typedef void (APIENTRYP PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVEXTPROC) (GLenum target, GLenum attachment, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGENERATEMIPMAPEXTPROC) (GLenum target);
+#endif
+
+#ifndef GL_GREMEDY_string_marker
+#define GL_GREMEDY_string_marker 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glStringMarkerGREMEDY (GLsizei, const GLvoid *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLSTRINGMARKERGREMEDYPROC) (GLsizei len, const GLvoid *string);
+#endif
+
+#ifndef GL_EXT_packed_depth_stencil
+#define GL_EXT_packed_depth_stencil 1
+#endif
+
+#ifndef GL_EXT_stencil_clear_tag
+#define GL_EXT_stencil_clear_tag 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glStencilClearTagEXT (GLsizei, GLuint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLSTENCILCLEARTAGEXTPROC) (GLsizei stencilTagBits, GLuint stencilClearTag);
+#endif
+
+#ifndef GL_EXT_texture_sRGB
+#define GL_EXT_texture_sRGB 1
+#endif
+
+#ifndef GL_EXT_framebuffer_blit
+#define GL_EXT_framebuffer_blit 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glBlitFramebufferEXT (GLint, GLint, GLint, GLint, GLint, GLint, GLint, GLint, GLbitfield, GLenum);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLBLITFRAMEBUFFEREXTPROC) (GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter);
+#endif
+
+#ifndef GL_EXT_framebuffer_multisample
+#define GL_EXT_framebuffer_multisample 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glRenderbufferStorageMultisampleEXT (GLenum, GLsizei, GLenum, GLsizei, GLsizei);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLEEXTPROC) (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height);
+#endif
+
+#ifndef GL_MESAX_texture_stack
+#define GL_MESAX_texture_stack 1
+#endif
+
+#ifndef GL_EXT_timer_query
+#define GL_EXT_timer_query 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glGetQueryObjecti64vEXT (GLuint, GLenum, GLint64EXT *);
+GLAPI void APIENTRY glGetQueryObjectui64vEXT (GLuint, GLenum, GLuint64EXT *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLGETQUERYOBJECTI64VEXTPROC) (GLuint id, GLenum pname, GLint64EXT *params);
+typedef void (APIENTRYP PFNGLGETQUERYOBJECTUI64VEXTPROC) (GLuint id, GLenum pname, GLuint64EXT *params);
+#endif
+
+#ifndef GL_EXT_gpu_program_parameters
+#define GL_EXT_gpu_program_parameters 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glProgramEnvParameters4fvEXT (GLenum, GLuint, GLsizei, const GLfloat *);
+GLAPI void APIENTRY glProgramLocalParameters4fvEXT (GLenum, GLuint, GLsizei, const GLfloat *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLPROGRAMENVPARAMETERS4FVEXTPROC) (GLenum target, GLuint index, GLsizei count, const GLfloat *params);
+typedef void (APIENTRYP PFNGLPROGRAMLOCALPARAMETERS4FVEXTPROC) (GLenum target, GLuint index, GLsizei count, const GLfloat *params);
+#endif
+
+#ifndef GL_APPLE_flush_buffer_range
+#define GL_APPLE_flush_buffer_range 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glBufferParameteriAPPLE (GLenum, GLenum, GLint);
+GLAPI void APIENTRY glFlushMappedBufferRangeAPPLE (GLenum, GLintptr, GLsizeiptr);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLBUFFERPARAMETERIAPPLEPROC) (GLenum target, GLenum pname, GLint param);
+typedef void (APIENTRYP PFNGLFLUSHMAPPEDBUFFERRANGEAPPLEPROC) (GLenum target, GLintptr offset, GLsizeiptr size);
+#endif
+
+#ifndef GL_NV_gpu_program4
+#define GL_NV_gpu_program4 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glProgramLocalParameterI4iNV (GLenum, GLuint, GLint, GLint, GLint, GLint);
+GLAPI void APIENTRY glProgramLocalParameterI4ivNV (GLenum, GLuint, const GLint *);
+GLAPI void APIENTRY glProgramLocalParametersI4ivNV (GLenum, GLuint, GLsizei, const GLint *);
+GLAPI void APIENTRY glProgramLocalParameterI4uiNV (GLenum, GLuint, GLuint, GLuint, GLuint, GLuint);
+GLAPI void APIENTRY glProgramLocalParameterI4uivNV (GLenum, GLuint, const GLuint *);
+GLAPI void APIENTRY glProgramLocalParametersI4uivNV (GLenum, GLuint, GLsizei, const GLuint *);
+GLAPI void APIENTRY glProgramEnvParameterI4iNV (GLenum, GLuint, GLint, GLint, GLint, GLint);
+GLAPI void APIENTRY glProgramEnvParameterI4ivNV (GLenum, GLuint, const GLint *);
+GLAPI void APIENTRY glProgramEnvParametersI4ivNV (GLenum, GLuint, GLsizei, const GLint *);
+GLAPI void APIENTRY glProgramEnvParameterI4uiNV (GLenum, GLuint, GLuint, GLuint, GLuint, GLuint);
+GLAPI void APIENTRY glProgramEnvParameterI4uivNV (GLenum, GLuint, const GLuint *);
+GLAPI void APIENTRY glProgramEnvParametersI4uivNV (GLenum, GLuint, GLsizei, const GLuint *);
+GLAPI void APIENTRY glGetProgramLocalParameterIivNV (GLenum, GLuint, GLint *);
+GLAPI void APIENTRY glGetProgramLocalParameterIuivNV (GLenum, GLuint, GLuint *);
+GLAPI void APIENTRY glGetProgramEnvParameterIivNV (GLenum, GLuint, GLint *);
+GLAPI void APIENTRY glGetProgramEnvParameterIuivNV (GLenum, GLuint, GLuint *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLPROGRAMLOCALPARAMETERI4INVPROC) (GLenum target, GLuint index, GLint x, GLint y, GLint z, GLint w);
+typedef void (APIENTRYP PFNGLPROGRAMLOCALPARAMETERI4IVNVPROC) (GLenum target, GLuint index, const GLint *params);
+typedef void (APIENTRYP PFNGLPROGRAMLOCALPARAMETERSI4IVNVPROC) (GLenum target, GLuint index, GLsizei count, const GLint *params);
+typedef void (APIENTRYP PFNGLPROGRAMLOCALPARAMETERI4UINVPROC) (GLenum target, GLuint index, GLuint x, GLuint y, GLuint z, GLuint w);
+typedef void (APIENTRYP PFNGLPROGRAMLOCALPARAMETERI4UIVNVPROC) (GLenum target, GLuint index, const GLuint *params);
+typedef void (APIENTRYP PFNGLPROGRAMLOCALPARAMETERSI4UIVNVPROC) (GLenum target, GLuint index, GLsizei count, const GLuint *params);
+typedef void (APIENTRYP PFNGLPROGRAMENVPARAMETERI4INVPROC) (GLenum target, GLuint index, GLint x, GLint y, GLint z, GLint w);
+typedef void (APIENTRYP PFNGLPROGRAMENVPARAMETERI4IVNVPROC) (GLenum target, GLuint index, const GLint *params);
+typedef void (APIENTRYP PFNGLPROGRAMENVPARAMETERSI4IVNVPROC) (GLenum target, GLuint index, GLsizei count, const GLint *params);
+typedef void (APIENTRYP PFNGLPROGRAMENVPARAMETERI4UINVPROC) (GLenum target, GLuint index, GLuint x, GLuint y, GLuint z, GLuint w);
+typedef void (APIENTRYP PFNGLPROGRAMENVPARAMETERI4UIVNVPROC) (GLenum target, GLuint index, const GLuint *params);
+typedef void (APIENTRYP PFNGLPROGRAMENVPARAMETERSI4UIVNVPROC) (GLenum target, GLuint index, GLsizei count, const GLuint *params);
+typedef void (APIENTRYP PFNGLGETPROGRAMLOCALPARAMETERIIVNVPROC) (GLenum target, GLuint index, GLint *params);
+typedef void (APIENTRYP PFNGLGETPROGRAMLOCALPARAMETERIUIVNVPROC) (GLenum target, GLuint index, GLuint *params);
+typedef void (APIENTRYP PFNGLGETPROGRAMENVPARAMETERIIVNVPROC) (GLenum target, GLuint index, GLint *params);
+typedef void (APIENTRYP PFNGLGETPROGRAMENVPARAMETERIUIVNVPROC) (GLenum target, GLuint index, GLuint *params);
+#endif
+
+#ifndef GL_NV_geometry_program4
+#define GL_NV_geometry_program4 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glProgramVertexLimitNV (GLenum, GLint);
+GLAPI void APIENTRY glFramebufferTextureEXT (GLenum, GLenum, GLuint, GLint);
+GLAPI void APIENTRY glFramebufferTextureLayerEXT (GLenum, GLenum, GLuint, GLint, GLint);
+GLAPI void APIENTRY glFramebufferTextureFaceEXT (GLenum, GLenum, GLuint, GLint, GLenum);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLPROGRAMVERTEXLIMITNVPROC) (GLenum target, GLint limit);
+typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTUREEXTPROC) (GLenum target, GLenum attachment, GLuint texture, GLint level);
+typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURELAYEREXTPROC) (GLenum target, GLenum attachment, GLuint texture, GLint level, GLint layer);
+typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTUREFACEEXTPROC) (GLenum target, GLenum attachment, GLuint texture, GLint level, GLenum face);
+#endif
+
+#ifndef GL_EXT_geometry_shader4
+#define GL_EXT_geometry_shader4 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glProgramParameteriEXT (GLuint, GLenum, GLint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLPROGRAMPARAMETERIEXTPROC) (GLuint program, GLenum pname, GLint value);
+#endif
+
+#ifndef GL_NV_vertex_program4
+#define GL_NV_vertex_program4 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glVertexAttribI1iEXT (GLuint, GLint);
+GLAPI void APIENTRY glVertexAttribI2iEXT (GLuint, GLint, GLint);
+GLAPI void APIENTRY glVertexAttribI3iEXT (GLuint, GLint, GLint, GLint);
+GLAPI void APIENTRY glVertexAttribI4iEXT (GLuint, GLint, GLint, GLint, GLint);
+GLAPI void APIENTRY glVertexAttribI1uiEXT (GLuint, GLuint);
+GLAPI void APIENTRY glVertexAttribI2uiEXT (GLuint, GLuint, GLuint);
+GLAPI void APIENTRY glVertexAttribI3uiEXT (GLuint, GLuint, GLuint, GLuint);
+GLAPI void APIENTRY glVertexAttribI4uiEXT (GLuint, GLuint, GLuint, GLuint, GLuint);
+GLAPI void APIENTRY glVertexAttribI1ivEXT (GLuint, const GLint *);
+GLAPI void APIENTRY glVertexAttribI2ivEXT (GLuint, const GLint *);
+GLAPI void APIENTRY glVertexAttribI3ivEXT (GLuint, const GLint *);
+GLAPI void APIENTRY glVertexAttribI4ivEXT (GLuint, const GLint *);
+GLAPI void APIENTRY glVertexAttribI1uivEXT (GLuint, const GLuint *);
+GLAPI void APIENTRY glVertexAttribI2uivEXT (GLuint, const GLuint *);
+GLAPI void APIENTRY glVertexAttribI3uivEXT (GLuint, const GLuint *);
+GLAPI void APIENTRY glVertexAttribI4uivEXT (GLuint, const GLuint *);
+GLAPI void APIENTRY glVertexAttribI4bvEXT (GLuint, const GLbyte *);
+GLAPI void APIENTRY glVertexAttribI4svEXT (GLuint, const GLshort *);
+GLAPI void APIENTRY glVertexAttribI4ubvEXT (GLuint, const GLubyte *);
+GLAPI void APIENTRY glVertexAttribI4usvEXT (GLuint, const GLushort *);
+GLAPI void APIENTRY glVertexAttribIPointerEXT (GLuint, GLint, GLenum, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glGetVertexAttribIivEXT (GLuint, GLenum, GLint *);
+GLAPI void APIENTRY glGetVertexAttribIuivEXT (GLuint, GLenum, GLuint *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI1IEXTPROC) (GLuint index, GLint x);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI2IEXTPROC) (GLuint index, GLint x, GLint y);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI3IEXTPROC) (GLuint index, GLint x, GLint y, GLint z);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI4IEXTPROC) (GLuint index, GLint x, GLint y, GLint z, GLint w);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI1UIEXTPROC) (GLuint index, GLuint x);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI2UIEXTPROC) (GLuint index, GLuint x, GLuint y);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI3UIEXTPROC) (GLuint index, GLuint x, GLuint y, GLuint z);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI4UIEXTPROC) (GLuint index, GLuint x, GLuint y, GLuint z, GLuint w);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI1IVEXTPROC) (GLuint index, const GLint *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI2IVEXTPROC) (GLuint index, const GLint *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI3IVEXTPROC) (GLuint index, const GLint *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI4IVEXTPROC) (GLuint index, const GLint *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI1UIVEXTPROC) (GLuint index, const GLuint *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI2UIVEXTPROC) (GLuint index, const GLuint *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI3UIVEXTPROC) (GLuint index, const GLuint *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI4UIVEXTPROC) (GLuint index, const GLuint *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI4BVEXTPROC) (GLuint index, const GLbyte *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI4SVEXTPROC) (GLuint index, const GLshort *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI4UBVEXTPROC) (GLuint index, const GLubyte *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI4USVEXTPROC) (GLuint index, const GLushort *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBIPOINTEREXTPROC) (GLuint index, GLint size, GLenum type, GLsizei stride, const GLvoid *pointer);
+typedef void (APIENTRYP PFNGLGETVERTEXATTRIBIIVEXTPROC) (GLuint index, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETVERTEXATTRIBIUIVEXTPROC) (GLuint index, GLenum pname, GLuint *params);
+#endif
+
+#ifndef GL_EXT_gpu_shader4
+#define GL_EXT_gpu_shader4 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glGetUniformuivEXT (GLuint, GLint, GLuint *);
+GLAPI void APIENTRY glBindFragDataLocationEXT (GLuint, GLuint, const GLchar *);
+GLAPI GLint APIENTRY glGetFragDataLocationEXT (GLuint, const GLchar *);
+GLAPI void APIENTRY glUniform1uiEXT (GLint, GLuint);
+GLAPI void APIENTRY glUniform2uiEXT (GLint, GLuint, GLuint);
+GLAPI void APIENTRY glUniform3uiEXT (GLint, GLuint, GLuint, GLuint);
+GLAPI void APIENTRY glUniform4uiEXT (GLint, GLuint, GLuint, GLuint, GLuint);
+GLAPI void APIENTRY glUniform1uivEXT (GLint, GLsizei, const GLuint *);
+GLAPI void APIENTRY glUniform2uivEXT (GLint, GLsizei, const GLuint *);
+GLAPI void APIENTRY glUniform3uivEXT (GLint, GLsizei, const GLuint *);
+GLAPI void APIENTRY glUniform4uivEXT (GLint, GLsizei, const GLuint *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLGETUNIFORMUIVEXTPROC) (GLuint program, GLint location, GLuint *params);
+typedef void (APIENTRYP PFNGLBINDFRAGDATALOCATIONEXTPROC) (GLuint program, GLuint color, const GLchar *name);
+typedef GLint (APIENTRYP PFNGLGETFRAGDATALOCATIONEXTPROC) (GLuint program, const GLchar *name);
+typedef void (APIENTRYP PFNGLUNIFORM1UIEXTPROC) (GLint location, GLuint v0);
+typedef void (APIENTRYP PFNGLUNIFORM2UIEXTPROC) (GLint location, GLuint v0, GLuint v1);
+typedef void (APIENTRYP PFNGLUNIFORM3UIEXTPROC) (GLint location, GLuint v0, GLuint v1, GLuint v2);
+typedef void (APIENTRYP PFNGLUNIFORM4UIEXTPROC) (GLint location, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+typedef void (APIENTRYP PFNGLUNIFORM1UIVEXTPROC) (GLint location, GLsizei count, const GLuint *value);
+typedef void (APIENTRYP PFNGLUNIFORM2UIVEXTPROC) (GLint location, GLsizei count, const GLuint *value);
+typedef void (APIENTRYP PFNGLUNIFORM3UIVEXTPROC) (GLint location, GLsizei count, const GLuint *value);
+typedef void (APIENTRYP PFNGLUNIFORM4UIVEXTPROC) (GLint location, GLsizei count, const GLuint *value);
+#endif
+
+#ifndef GL_EXT_draw_instanced
+#define GL_EXT_draw_instanced 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glDrawArraysInstancedEXT (GLenum, GLint, GLsizei, GLsizei);
+GLAPI void APIENTRY glDrawElementsInstancedEXT (GLenum, GLsizei, GLenum, const GLvoid *, GLsizei);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLDRAWARRAYSINSTANCEDEXTPROC) (GLenum mode, GLint start, GLsizei count, GLsizei primcount);
+typedef void (APIENTRYP PFNGLDRAWELEMENTSINSTANCEDEXTPROC) (GLenum mode, GLsizei count, GLenum type, const GLvoid *indices, GLsizei primcount);
+#endif
+
+#ifndef GL_EXT_packed_float
+#define GL_EXT_packed_float 1
+#endif
+
+#ifndef GL_EXT_texture_array
+#define GL_EXT_texture_array 1
+#endif
+
+#ifndef GL_EXT_texture_buffer_object
+#define GL_EXT_texture_buffer_object 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glTexBufferEXT (GLenum, GLenum, GLuint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLTEXBUFFEREXTPROC) (GLenum target, GLenum internalformat, GLuint buffer);
+#endif
+
+#ifndef GL_EXT_texture_compression_latc
+#define GL_EXT_texture_compression_latc 1
+#endif
+
+#ifndef GL_EXT_texture_compression_rgtc
+#define GL_EXT_texture_compression_rgtc 1
+#endif
+
+#ifndef GL_EXT_texture_shared_exponent
+#define GL_EXT_texture_shared_exponent 1
+#endif
+
+#ifndef GL_NV_depth_buffer_float
+#define GL_NV_depth_buffer_float 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glDepthRangedNV (GLdouble, GLdouble);
+GLAPI void APIENTRY glClearDepthdNV (GLdouble);
+GLAPI void APIENTRY glDepthBoundsdNV (GLdouble, GLdouble);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLDEPTHRANGEDNVPROC) (GLdouble zNear, GLdouble zFar);
+typedef void (APIENTRYP PFNGLCLEARDEPTHDNVPROC) (GLdouble depth);
+typedef void (APIENTRYP PFNGLDEPTHBOUNDSDNVPROC) (GLdouble zmin, GLdouble zmax);
+#endif
+
+#ifndef GL_NV_fragment_program4
+#define GL_NV_fragment_program4 1
+#endif
+
+#ifndef GL_NV_framebuffer_multisample_coverage
+#define GL_NV_framebuffer_multisample_coverage 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glRenderbufferStorageMultisampleCoverageNV (GLenum, GLsizei, GLsizei, GLenum, GLsizei, GLsizei);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLECOVERAGENVPROC) (GLenum target, GLsizei coverageSamples, GLsizei colorSamples, GLenum internalformat, GLsizei width, GLsizei height);
+#endif
+
+#ifndef GL_EXT_framebuffer_sRGB
+#define GL_EXT_framebuffer_sRGB 1
+#endif
+
+#ifndef GL_NV_geometry_shader4
+#define GL_NV_geometry_shader4 1
+#endif
+
+#ifndef GL_NV_parameter_buffer_object
+#define GL_NV_parameter_buffer_object 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glProgramBufferParametersfvNV (GLenum, GLuint, GLuint, GLsizei, const GLfloat *);
+GLAPI void APIENTRY glProgramBufferParametersIivNV (GLenum, GLuint, GLuint, GLsizei, const GLint *);
+GLAPI void APIENTRY glProgramBufferParametersIuivNV (GLenum, GLuint, GLuint, GLsizei, const GLuint *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLPROGRAMBUFFERPARAMETERSFVNVPROC) (GLenum target, GLuint buffer, GLuint index, GLsizei count, const GLfloat *params);
+typedef void (APIENTRYP PFNGLPROGRAMBUFFERPARAMETERSIIVNVPROC) (GLenum target, GLuint buffer, GLuint index, GLsizei count, const GLint *params);
+typedef void (APIENTRYP PFNGLPROGRAMBUFFERPARAMETERSIUIVNVPROC) (GLenum target, GLuint buffer, GLuint index, GLsizei count, const GLuint *params);
+#endif
+
+#ifndef GL_EXT_draw_buffers2
+#define GL_EXT_draw_buffers2 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glColorMaskIndexedEXT (GLuint, GLboolean, GLboolean, GLboolean, GLboolean);
+GLAPI void APIENTRY glGetBooleanIndexedvEXT (GLenum, GLuint, GLboolean *);
+GLAPI void APIENTRY glGetIntegerIndexedvEXT (GLenum, GLuint, GLint *);
+GLAPI void APIENTRY glEnableIndexedEXT (GLenum, GLuint);
+GLAPI void APIENTRY glDisableIndexedEXT (GLenum, GLuint);
+GLAPI GLboolean APIENTRY glIsEnabledIndexedEXT (GLenum, GLuint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLCOLORMASKINDEXEDEXTPROC) (GLuint index, GLboolean r, GLboolean g, GLboolean b, GLboolean a);
+typedef void (APIENTRYP PFNGLGETBOOLEANINDEXEDVEXTPROC) (GLenum target, GLuint index, GLboolean *data);
+typedef void (APIENTRYP PFNGLGETINTEGERINDEXEDVEXTPROC) (GLenum target, GLuint index, GLint *data);
+typedef void (APIENTRYP PFNGLENABLEINDEXEDEXTPROC) (GLenum target, GLuint index);
+typedef void (APIENTRYP PFNGLDISABLEINDEXEDEXTPROC) (GLenum target, GLuint index);
+typedef GLboolean (APIENTRYP PFNGLISENABLEDINDEXEDEXTPROC) (GLenum target, GLuint index);
+#endif
+
+#ifndef GL_NV_transform_feedback
+#define GL_NV_transform_feedback 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glBeginTransformFeedbackNV (GLenum);
+GLAPI void APIENTRY glEndTransformFeedbackNV (void);
+GLAPI void APIENTRY glTransformFeedbackAttribsNV (GLuint, const GLint *, GLenum);
+GLAPI void APIENTRY glBindBufferRangeNV (GLenum, GLuint, GLuint, GLintptr, GLsizeiptr);
+GLAPI void APIENTRY glBindBufferoffsetNV (GLenum, GLuint, GLuint, GLintptr);
+GLAPI void APIENTRY glBindBufferBaseNV (GLenum, GLuint, GLuint);
+GLAPI void APIENTRY glTransformFeedbackVaryingsNV (GLuint, GLsizei, const GLint *, GLenum);
+GLAPI void APIENTRY glActiveVaryingNV (GLuint, const GLchar *);
+GLAPI GLint APIENTRY glGetVaryingLocationNV (GLuint, const GLchar *);
+GLAPI void APIENTRY glGetActiveVaryingNV (GLuint, GLuint, GLsizei, GLsizei *, GLsizei *, GLenum *, GLchar *);
+GLAPI void APIENTRY glGetTransformFeedbackVaryingNV (GLuint, GLuint, GLint *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLBEGINTRANSFORMFEEDBACKNVPROC) (GLenum primitiveMode);
+typedef void (APIENTRYP PFNGLENDTRANSFORMFEEDBACKNVPROC) (void);
+typedef void (APIENTRYP PFNGLTRANSFORMFEEDBACKATTRIBSNVPROC) (GLuint count, const GLint *attribs, GLenum bufferMode);
+typedef void (APIENTRYP PFNGLBINDBUFFERRANGENVPROC) (GLenum target, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size);
+typedef void (APIENTRYP PFNGLBINDBUFFEROFFSETNVPROC) (GLenum target, GLuint index, GLuint buffer, GLintptr offset);
+typedef void (APIENTRYP PFNGLBINDBUFFERBASENVPROC) (GLenum target, GLuint index, GLuint buffer);
+typedef void (APIENTRYP PFNGLTRANSFORMFEEDBACKVARYINGSNVPROC) (GLuint program, GLsizei count, const GLint *locations, GLenum bufferMode);
+typedef void (APIENTRYP PFNGLACTIVEVARYINGNVPROC) (GLuint program, const GLchar *name);
+typedef GLint (APIENTRYP PFNGLGETVARYINGLOCATIONNVPROC) (GLuint program, const GLchar *name);
+typedef void (APIENTRYP PFNGLGETACTIVEVARYINGNVPROC) (GLuint program, GLuint index, GLsizei bufSize, GLsizei *length, GLsizei *size, GLenum *type, GLchar *name);
+typedef void (APIENTRYP PFNGLGETTRANSFORMFEEDBACKVARYINGNVPROC) (GLuint program, GLuint index, GLint *location);
+#endif
+
+#ifndef GL_EXT_bindable_uniform
+#define GL_EXT_bindable_uniform 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glUniformBufferEXT (GLuint, GLint, GLuint);
+GLAPI GLint APIENTRY glGetUniformBufferSizeEXT (GLuint, GLint);
+GLAPI GLintptr APIENTRY glGetUniformoffsetEXT (GLuint, GLint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLUNIFORMBUFFEREXTPROC) (GLuint program, GLint location, GLuint buffer);
+typedef GLint (APIENTRYP PFNGLGETUNIFORMBUFFERSIZEEXTPROC) (GLuint program, GLint location);
+typedef GLintptr (APIENTRYP PFNGLGETUNIFORMOFFSETEXTPROC) (GLuint program, GLint location);
+#endif
+
+#ifndef GL_EXT_texture_integer
+#define GL_EXT_texture_integer 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glTexParameterIivEXT (GLenum, GLenum, const GLint *);
+GLAPI void APIENTRY glTexParameterIuivEXT (GLenum, GLenum, const GLuint *);
+GLAPI void APIENTRY glGetTexParameterIivEXT (GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glGetTexParameterIuivEXT (GLenum, GLenum, GLuint *);
+GLAPI void APIENTRY glClearColorIiEXT (GLint, GLint, GLint, GLint);
+GLAPI void APIENTRY glClearColorIuiEXT (GLuint, GLuint, GLuint, GLuint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLTEXPARAMETERIIVEXTPROC) (GLenum target, GLenum pname, const GLint *params);
+typedef void (APIENTRYP PFNGLTEXPARAMETERIUIVEXTPROC) (GLenum target, GLenum pname, const GLuint *params);
+typedef void (APIENTRYP PFNGLGETTEXPARAMETERIIVEXTPROC) (GLenum target, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETTEXPARAMETERIUIVEXTPROC) (GLenum target, GLenum pname, GLuint *params);
+typedef void (APIENTRYP PFNGLCLEARCOLORIIEXTPROC) (GLint red, GLint green, GLint blue, GLint alpha);
+typedef void (APIENTRYP PFNGLCLEARCOLORIUIEXTPROC) (GLuint red, GLuint green, GLuint blue, GLuint alpha);
+#endif
+
+#ifndef GL_GREMEDY_frame_terminator
+#define GL_GREMEDY_frame_terminator 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glFrameTerminatorGREMEDY (void);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLFRAMETERMINATORGREMEDYPROC) (void);
+#endif
+
+#ifndef GL_NV_conditional_render
+#define GL_NV_conditional_render 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glBeginConditionalRenderNV (GLuint, GLenum);
+GLAPI void APIENTRY glEndConditionalRenderNV (void);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLBEGINCONDITIONALRENDERNVPROC) (GLuint id, GLenum mode);
+typedef void (APIENTRYP PFNGLENDCONDITIONALRENDERNVPROC) (void);
+#endif
+
+#ifndef GL_NV_present_video
+#define GL_NV_present_video 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glPresentFrameKeyedNV (GLuint, GLuint64EXT, GLuint, GLuint, GLenum, GLenum, GLuint, GLuint, GLenum, GLuint, GLuint);
+GLAPI void APIENTRY glPresentFrameDualFillNV (GLuint, GLuint64EXT, GLuint, GLuint, GLenum, GLenum, GLuint, GLenum, GLuint, GLenum, GLuint, GLenum, GLuint);
+GLAPI void APIENTRY glGetVideoivNV (GLuint, GLenum, GLint *);
+GLAPI void APIENTRY glGetVideouivNV (GLuint, GLenum, GLuint *);
+GLAPI void APIENTRY glGetVideoi64vNV (GLuint, GLenum, GLint64EXT *);
+GLAPI void APIENTRY glGetVideoui64vNV (GLuint, GLenum, GLuint64EXT *);
+GLAPI void APIENTRY glVideoParameterivNV (GLuint, GLenum, const GLint *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLPRESENTFRAMEKEYEDNVPROC) (GLuint video_slot, GLuint64EXT minPresentTime, GLuint beginPresentTimeId, GLuint presentDurationId, GLenum type, GLenum target0, GLuint fill0, GLuint key0, GLenum target1, GLuint fill1, GLuint key1);
+typedef void (APIENTRYP PFNGLPRESENTFRAMEDUALFILLNVPROC) (GLuint video_slot, GLuint64EXT minPresentTime, GLuint beginPresentTimeId, GLuint presentDurationId, GLenum type, GLenum target0, GLuint fill0, GLenum target1, GLuint fill1, GLenum target2, GLuint fill2, GLenum target3, GLuint fill3);
+typedef void (APIENTRYP PFNGLGETVIDEOIVNVPROC) (GLuint video_slot, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETVIDEOUIVNVPROC) (GLuint video_slot, GLenum pname, GLuint *params);
+typedef void (APIENTRYP PFNGLGETVIDEOI64VNVPROC) (GLuint video_slot, GLenum pname, GLint64EXT *params);
+typedef void (APIENTRYP PFNGLGETVIDEOUI64VNVPROC) (GLuint video_slot, GLenum pname, GLuint64EXT *params);
+typedef void (APIENTRYP PFNGLVIDEOPARAMETERIVNVPROC) (GLuint video_slot, GLenum pname, const GLint *params);
+#endif
+
+#ifndef GL_EXT_transform_feedback
+#define GL_EXT_transform_feedback 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glBeginTransformFeedbackEXT (GLenum);
+GLAPI void APIENTRY glEndTransformFeedbackEXT (void);
+GLAPI void APIENTRY glBindBufferRangeEXT (GLenum, GLuint, GLuint, GLintptr, GLsizeiptr);
+GLAPI void APIENTRY glBindBufferoffsetEXT (GLenum, GLuint, GLuint, GLintptr);
+GLAPI void APIENTRY glBindBufferBaseEXT (GLenum, GLuint, GLuint);
+GLAPI void APIENTRY glTransformFeedbackVaryingsEXT (GLuint, GLsizei, const GLchar* *, GLenum);
+GLAPI void APIENTRY glGetTransformFeedbackVaryingEXT (GLuint, GLuint, GLsizei, GLsizei *, GLsizei *, GLenum *, GLchar *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLBEGINTRANSFORMFEEDBACKEXTPROC) (GLenum primitiveMode);
+typedef void (APIENTRYP PFNGLENDTRANSFORMFEEDBACKEXTPROC) (void);
+typedef void (APIENTRYP PFNGLBINDBUFFERRANGEEXTPROC) (GLenum target, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size);
+typedef void (APIENTRYP PFNGLBINDBUFFEROFFSETEXTPROC) (GLenum target, GLuint index, GLuint buffer, GLintptr offset);
+typedef void (APIENTRYP PFNGLBINDBUFFERBASEEXTPROC) (GLenum target, GLuint index, GLuint buffer);
+typedef void (APIENTRYP PFNGLTRANSFORMFEEDBACKVARYINGSEXTPROC) (GLuint program, GLsizei count, const GLchar* *varyings, GLenum bufferMode);
+typedef void (APIENTRYP PFNGLGETTRANSFORMFEEDBACKVARYINGEXTPROC) (GLuint program, GLuint index, GLsizei bufSize, GLsizei *length, GLsizei *size, GLenum *type, GLchar *name);
+#endif
+
+#ifndef GL_EXT_direct_state_access
+#define GL_EXT_direct_state_access 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glClientAttribDefaultEXT (GLbitfield);
+GLAPI void APIENTRY glPushClientAttribDefaultEXT (GLbitfield);
+GLAPI void APIENTRY glMatrixLoadfEXT (GLenum, const GLfloat *);
+GLAPI void APIENTRY glMatrixLoaddEXT (GLenum, const GLdouble *);
+GLAPI void APIENTRY glMatrixMultfEXT (GLenum, const GLfloat *);
+GLAPI void APIENTRY glMatrixMultdEXT (GLenum, const GLdouble *);
+GLAPI void APIENTRY glMatrixLoadIdentityEXT (GLenum);
+GLAPI void APIENTRY glMatrixRotatefEXT (GLenum, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glMatrixRotatedEXT (GLenum, GLdouble, GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glMatrixScalefEXT (GLenum, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glMatrixScaledEXT (GLenum, GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glMatrixTranslatefEXT (GLenum, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glMatrixTranslatedEXT (GLenum, GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glMatrixFrustumEXT (GLenum, GLdouble, GLdouble, GLdouble, GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glMatrixOrthoEXT (GLenum, GLdouble, GLdouble, GLdouble, GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glMatrixPopEXT (GLenum);
+GLAPI void APIENTRY glMatrixPushEXT (GLenum);
+GLAPI void APIENTRY glMatrixLoadTransposefEXT (GLenum, const GLfloat *);
+GLAPI void APIENTRY glMatrixLoadTransposedEXT (GLenum, const GLdouble *);
+GLAPI void APIENTRY glMatrixMultTransposefEXT (GLenum, const GLfloat *);
+GLAPI void APIENTRY glMatrixMultTransposedEXT (GLenum, const GLdouble *);
+GLAPI void APIENTRY glTextureParameterfEXT (GLuint, GLenum, GLenum, GLfloat);
+GLAPI void APIENTRY glTextureParameterfvEXT (GLuint, GLenum, GLenum, const GLfloat *);
+GLAPI void APIENTRY glTextureParameteriEXT (GLuint, GLenum, GLenum, GLint);
+GLAPI void APIENTRY glTextureParameterivEXT (GLuint, GLenum, GLenum, const GLint *);
+GLAPI void APIENTRY glTextureImage1DEXT (GLuint, GLenum, GLint, GLenum, GLsizei, GLint, GLenum, GLenum, const GLvoid *);
+GLAPI void APIENTRY glTextureImage2DEXT (GLuint, GLenum, GLint, GLenum, GLsizei, GLsizei, GLint, GLenum, GLenum, const GLvoid *);
+GLAPI void APIENTRY glTextureSubImage1DEXT (GLuint, GLenum, GLint, GLint, GLsizei, GLenum, GLenum, const GLvoid *);
+GLAPI void APIENTRY glTextureSubImage2DEXT (GLuint, GLenum, GLint, GLint, GLint, GLsizei, GLsizei, GLenum, GLenum, const GLvoid *);
+GLAPI void APIENTRY glCopyTextureImage1DEXT (GLuint, GLenum, GLint, GLenum, GLint, GLint, GLsizei, GLint);
+GLAPI void APIENTRY glCopyTextureImage2DEXT (GLuint, GLenum, GLint, GLenum, GLint, GLint, GLsizei, GLsizei, GLint);
+GLAPI void APIENTRY glCopyTextureSubImage1DEXT (GLuint, GLenum, GLint, GLint, GLint, GLint, GLsizei);
+GLAPI void APIENTRY glCopyTextureSubImage2DEXT (GLuint, GLenum, GLint, GLint, GLint, GLint, GLint, GLsizei, GLsizei);
+GLAPI void APIENTRY glGetTextureImageEXT (GLuint, GLenum, GLint, GLenum, GLenum, GLvoid *);
+GLAPI void APIENTRY glGetTextureParameterfvEXT (GLuint, GLenum, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetTextureParameterivEXT (GLuint, GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glGetTextureLevelParameterfvEXT (GLuint, GLenum, GLint, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetTextureLevelParameterivEXT (GLuint, GLenum, GLint, GLenum, GLint *);
+GLAPI void APIENTRY glTextureImage3DEXT (GLuint, GLenum, GLint, GLenum, GLsizei, GLsizei, GLsizei, GLint, GLenum, GLenum, const GLvoid *);
+GLAPI void APIENTRY glTextureSubImage3DEXT (GLuint, GLenum, GLint, GLint, GLint, GLint, GLsizei, GLsizei, GLsizei, GLenum, GLenum, const GLvoid *);
+GLAPI void APIENTRY glCopyTextureSubImage3DEXT (GLuint, GLenum, GLint, GLint, GLint, GLint, GLint, GLint, GLsizei, GLsizei);
+GLAPI void APIENTRY glMultiTexParameterfEXT (GLenum, GLenum, GLenum, GLfloat);
+GLAPI void APIENTRY glMultiTexParameterfvEXT (GLenum, GLenum, GLenum, const GLfloat *);
+GLAPI void APIENTRY glMultiTexParameteriEXT (GLenum, GLenum, GLenum, GLint);
+GLAPI void APIENTRY glMultiTexParameterivEXT (GLenum, GLenum, GLenum, const GLint *);
+GLAPI void APIENTRY glMultiTexImage1DEXT (GLenum, GLenum, GLint, GLenum, GLsizei, GLint, GLenum, GLenum, const GLvoid *);
+GLAPI void APIENTRY glMultiTexImage2DEXT (GLenum, GLenum, GLint, GLenum, GLsizei, GLsizei, GLint, GLenum, GLenum, const GLvoid *);
+GLAPI void APIENTRY glMultiTexSubImage1DEXT (GLenum, GLenum, GLint, GLint, GLsizei, GLenum, GLenum, const GLvoid *);
+GLAPI void APIENTRY glMultiTexSubImage2DEXT (GLenum, GLenum, GLint, GLint, GLint, GLsizei, GLsizei, GLenum, GLenum, const GLvoid *);
+GLAPI void APIENTRY glCopyMultiTexImage1DEXT (GLenum, GLenum, GLint, GLenum, GLint, GLint, GLsizei, GLint);
+GLAPI void APIENTRY glCopyMultiTexImage2DEXT (GLenum, GLenum, GLint, GLenum, GLint, GLint, GLsizei, GLsizei, GLint);
+GLAPI void APIENTRY glCopyMultiTexSubImage1DEXT (GLenum, GLenum, GLint, GLint, GLint, GLint, GLsizei);
+GLAPI void APIENTRY glCopyMultiTexSubImage2DEXT (GLenum, GLenum, GLint, GLint, GLint, GLint, GLint, GLsizei, GLsizei);
+GLAPI void APIENTRY glGetMultiTexImageEXT (GLenum, GLenum, GLint, GLenum, GLenum, GLvoid *);
+GLAPI void APIENTRY glGetMultiTexParameterfvEXT (GLenum, GLenum, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetMultiTexParameterivEXT (GLenum, GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glGetMultiTexLevelParameterfvEXT (GLenum, GLenum, GLint, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetMultiTexLevelParameterivEXT (GLenum, GLenum, GLint, GLenum, GLint *);
+GLAPI void APIENTRY glMultiTexImage3DEXT (GLenum, GLenum, GLint, GLenum, GLsizei, GLsizei, GLsizei, GLint, GLenum, GLenum, const GLvoid *);
+GLAPI void APIENTRY glMultiTexSubImage3DEXT (GLenum, GLenum, GLint, GLint, GLint, GLint, GLsizei, GLsizei, GLsizei, GLenum, GLenum, const GLvoid *);
+GLAPI void APIENTRY glCopyMultiTexSubImage3DEXT (GLenum, GLenum, GLint, GLint, GLint, GLint, GLint, GLint, GLsizei, GLsizei);
+GLAPI void APIENTRY glBindMultiTextureEXT (GLenum, GLenum, GLuint);
+GLAPI void APIENTRY glEnableClientStateIndexedEXT (GLenum, GLuint);
+GLAPI void APIENTRY glDisableClientStateIndexedEXT (GLenum, GLuint);
+GLAPI void APIENTRY glMultiTexCoordPointerEXT (GLenum, GLint, GLenum, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glMultiTexEnvfEXT (GLenum, GLenum, GLenum, GLfloat);
+GLAPI void APIENTRY glMultiTexEnvfvEXT (GLenum, GLenum, GLenum, const GLfloat *);
+GLAPI void APIENTRY glMultiTexEnviEXT (GLenum, GLenum, GLenum, GLint);
+GLAPI void APIENTRY glMultiTexEnvivEXT (GLenum, GLenum, GLenum, const GLint *);
+GLAPI void APIENTRY glMultiTexGendEXT (GLenum, GLenum, GLenum, GLdouble);
+GLAPI void APIENTRY glMultiTexGendvEXT (GLenum, GLenum, GLenum, const GLdouble *);
+GLAPI void APIENTRY glMultiTexGenfEXT (GLenum, GLenum, GLenum, GLfloat);
+GLAPI void APIENTRY glMultiTexGenfvEXT (GLenum, GLenum, GLenum, const GLfloat *);
+GLAPI void APIENTRY glMultiTexGeniEXT (GLenum, GLenum, GLenum, GLint);
+GLAPI void APIENTRY glMultiTexGenivEXT (GLenum, GLenum, GLenum, const GLint *);
+GLAPI void APIENTRY glGetMultiTexEnvfvEXT (GLenum, GLenum, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetMultiTexEnvivEXT (GLenum, GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glGetMultiTexGendvEXT (GLenum, GLenum, GLenum, GLdouble *);
+GLAPI void APIENTRY glGetMultiTexGenfvEXT (GLenum, GLenum, GLenum, GLfloat *);
+GLAPI void APIENTRY glGetMultiTexGenivEXT (GLenum, GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glGetFloatIndexedvEXT (GLenum, GLuint, GLfloat *);
+GLAPI void APIENTRY glGetDoubleIndexedvEXT (GLenum, GLuint, GLdouble *);
+GLAPI void APIENTRY glGetPointerIndexedvEXT (GLenum, GLuint, GLvoid* *);
+GLAPI void APIENTRY glCompressedTextureImage3DEXT (GLuint, GLenum, GLint, GLenum, GLsizei, GLsizei, GLsizei, GLint, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glCompressedTextureImage2DEXT (GLuint, GLenum, GLint, GLenum, GLsizei, GLsizei, GLint, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glCompressedTextureImage1DEXT (GLuint, GLenum, GLint, GLenum, GLsizei, GLint, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glCompressedTextureSubImage3DEXT (GLuint, GLenum, GLint, GLint, GLint, GLint, GLsizei, GLsizei, GLsizei, GLenum, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glCompressedTextureSubImage2DEXT (GLuint, GLenum, GLint, GLint, GLint, GLsizei, GLsizei, GLenum, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glCompressedTextureSubImage1DEXT (GLuint, GLenum, GLint, GLint, GLsizei, GLenum, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glGetCompressedTextureImageEXT (GLuint, GLenum, GLint, GLvoid *);
+GLAPI void APIENTRY glCompressedMultiTexImage3DEXT (GLenum, GLenum, GLint, GLenum, GLsizei, GLsizei, GLsizei, GLint, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glCompressedMultiTexImage2DEXT (GLenum, GLenum, GLint, GLenum, GLsizei, GLsizei, GLint, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glCompressedMultiTexImage1DEXT (GLenum, GLenum, GLint, GLenum, GLsizei, GLint, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glCompressedMultiTexSubImage3DEXT (GLenum, GLenum, GLint, GLint, GLint, GLint, GLsizei, GLsizei, GLsizei, GLenum, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glCompressedMultiTexSubImage2DEXT (GLenum, GLenum, GLint, GLint, GLint, GLsizei, GLsizei, GLenum, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glCompressedMultiTexSubImage1DEXT (GLenum, GLenum, GLint, GLint, GLsizei, GLenum, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glGetCompressedMultiTexImageEXT (GLenum, GLenum, GLint, GLvoid *);
+GLAPI void APIENTRY glNamedProgramStringEXT (GLuint, GLenum, GLenum, GLsizei, const GLvoid *);
+GLAPI void APIENTRY glNamedProgramLocalParameter4dEXT (GLuint, GLenum, GLuint, GLdouble, GLdouble, GLdouble, GLdouble);
+GLAPI void APIENTRY glNamedProgramLocalParameter4dvEXT (GLuint, GLenum, GLuint, const GLdouble *);
+GLAPI void APIENTRY glNamedProgramLocalParameter4fEXT (GLuint, GLenum, GLuint, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glNamedProgramLocalParameter4fvEXT (GLuint, GLenum, GLuint, const GLfloat *);
+GLAPI void APIENTRY glGetNamedProgramLocalParameterdvEXT (GLuint, GLenum, GLuint, GLdouble *);
+GLAPI void APIENTRY glGetNamedProgramLocalParameterfvEXT (GLuint, GLenum, GLuint, GLfloat *);
+GLAPI void APIENTRY glGetNamedProgramivEXT (GLuint, GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glGetNamedProgramStringEXT (GLuint, GLenum, GLenum, GLvoid *);
+GLAPI void APIENTRY glNamedProgramLocalParameters4fvEXT (GLuint, GLenum, GLuint, GLsizei, const GLfloat *);
+GLAPI void APIENTRY glNamedProgramLocalParameterI4iEXT (GLuint, GLenum, GLuint, GLint, GLint, GLint, GLint);
+GLAPI void APIENTRY glNamedProgramLocalParameterI4ivEXT (GLuint, GLenum, GLuint, const GLint *);
+GLAPI void APIENTRY glNamedProgramLocalParametersI4ivEXT (GLuint, GLenum, GLuint, GLsizei, const GLint *);
+GLAPI void APIENTRY glNamedProgramLocalParameterI4uiEXT (GLuint, GLenum, GLuint, GLuint, GLuint, GLuint, GLuint);
+GLAPI void APIENTRY glNamedProgramLocalParameterI4uivEXT (GLuint, GLenum, GLuint, const GLuint *);
+GLAPI void APIENTRY glNamedProgramLocalParametersI4uivEXT (GLuint, GLenum, GLuint, GLsizei, const GLuint *);
+GLAPI void APIENTRY glGetNamedProgramLocalParameterIivEXT (GLuint, GLenum, GLuint, GLint *);
+GLAPI void APIENTRY glGetNamedProgramLocalParameterIuivEXT (GLuint, GLenum, GLuint, GLuint *);
+GLAPI void APIENTRY glTextureParameterIivEXT (GLuint, GLenum, GLenum, const GLint *);
+GLAPI void APIENTRY glTextureParameterIuivEXT (GLuint, GLenum, GLenum, const GLuint *);
+GLAPI void APIENTRY glGetTextureParameterIivEXT (GLuint, GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glGetTextureParameterIuivEXT (GLuint, GLenum, GLenum, GLuint *);
+GLAPI void APIENTRY glMultiTexParameterIivEXT (GLenum, GLenum, GLenum, const GLint *);
+GLAPI void APIENTRY glMultiTexParameterIuivEXT (GLenum, GLenum, GLenum, const GLuint *);
+GLAPI void APIENTRY glGetMultiTexParameterIivEXT (GLenum, GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glGetMultiTexParameterIuivEXT (GLenum, GLenum, GLenum, GLuint *);
+GLAPI void APIENTRY glProgramUniform1fEXT (GLuint, GLint, GLfloat);
+GLAPI void APIENTRY glProgramUniform2fEXT (GLuint, GLint, GLfloat, GLfloat);
+GLAPI void APIENTRY glProgramUniform3fEXT (GLuint, GLint, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glProgramUniform4fEXT (GLuint, GLint, GLfloat, GLfloat, GLfloat, GLfloat);
+GLAPI void APIENTRY glProgramUniform1iEXT (GLuint, GLint, GLint);
+GLAPI void APIENTRY glProgramUniform2iEXT (GLuint, GLint, GLint, GLint);
+GLAPI void APIENTRY glProgramUniform3iEXT (GLuint, GLint, GLint, GLint, GLint);
+GLAPI void APIENTRY glProgramUniform4iEXT (GLuint, GLint, GLint, GLint, GLint, GLint);
+GLAPI void APIENTRY glProgramUniform1fvEXT (GLuint, GLint, GLsizei, const GLfloat *);
+GLAPI void APIENTRY glProgramUniform2fvEXT (GLuint, GLint, GLsizei, const GLfloat *);
+GLAPI void APIENTRY glProgramUniform3fvEXT (GLuint, GLint, GLsizei, const GLfloat *);
+GLAPI void APIENTRY glProgramUniform4fvEXT (GLuint, GLint, GLsizei, const GLfloat *);
+GLAPI void APIENTRY glProgramUniform1ivEXT (GLuint, GLint, GLsizei, const GLint *);
+GLAPI void APIENTRY glProgramUniform2ivEXT (GLuint, GLint, GLsizei, const GLint *);
+GLAPI void APIENTRY glProgramUniform3ivEXT (GLuint, GLint, GLsizei, const GLint *);
+GLAPI void APIENTRY glProgramUniform4ivEXT (GLuint, GLint, GLsizei, const GLint *);
+GLAPI void APIENTRY glProgramUniformMatrix2fvEXT (GLuint, GLint, GLsizei, GLboolean, const GLfloat *);
+GLAPI void APIENTRY glProgramUniformMatrix3fvEXT (GLuint, GLint, GLsizei, GLboolean, const GLfloat *);
+GLAPI void APIENTRY glProgramUniformMatrix4fvEXT (GLuint, GLint, GLsizei, GLboolean, const GLfloat *);
+GLAPI void APIENTRY glProgramUniformMatrix2x3fvEXT (GLuint, GLint, GLsizei, GLboolean, const GLfloat *);
+GLAPI void APIENTRY glProgramUniformMatrix3x2fvEXT (GLuint, GLint, GLsizei, GLboolean, const GLfloat *);
+GLAPI void APIENTRY glProgramUniformMatrix2x4fvEXT (GLuint, GLint, GLsizei, GLboolean, const GLfloat *);
+GLAPI void APIENTRY glProgramUniformMatrix4x2fvEXT (GLuint, GLint, GLsizei, GLboolean, const GLfloat *);
+GLAPI void APIENTRY glProgramUniformMatrix3x4fvEXT (GLuint, GLint, GLsizei, GLboolean, const GLfloat *);
+GLAPI void APIENTRY glProgramUniformMatrix4x3fvEXT (GLuint, GLint, GLsizei, GLboolean, const GLfloat *);
+GLAPI void APIENTRY glProgramUniform1uiEXT (GLuint, GLint, GLuint);
+GLAPI void APIENTRY glProgramUniform2uiEXT (GLuint, GLint, GLuint, GLuint);
+GLAPI void APIENTRY glProgramUniform3uiEXT (GLuint, GLint, GLuint, GLuint, GLuint);
+GLAPI void APIENTRY glProgramUniform4uiEXT (GLuint, GLint, GLuint, GLuint, GLuint, GLuint);
+GLAPI void APIENTRY glProgramUniform1uivEXT (GLuint, GLint, GLsizei, const GLuint *);
+GLAPI void APIENTRY glProgramUniform2uivEXT (GLuint, GLint, GLsizei, const GLuint *);
+GLAPI void APIENTRY glProgramUniform3uivEXT (GLuint, GLint, GLsizei, const GLuint *);
+GLAPI void APIENTRY glProgramUniform4uivEXT (GLuint, GLint, GLsizei, const GLuint *);
+GLAPI void APIENTRY glNamedBufferDataEXT (GLuint, GLsizeiptr, const GLvoid *, GLenum);
+GLAPI void APIENTRY glNamedBufferSubDataEXT (GLuint, GLintptr, GLsizeiptr, const GLvoid *);
+GLAPI GLvoid* APIENTRY glMapNamedBufferEXT (GLuint, GLenum);
+GLAPI GLboolean APIENTRY glUnmapNamedBufferEXT (GLuint);
+GLAPI void APIENTRY glGetNamedBufferParameterivEXT (GLuint, GLenum, GLint *);
+GLAPI void APIENTRY glGetNamedBufferPointervEXT (GLuint, GLenum, GLvoid* *);
+GLAPI void APIENTRY glGetNamedBufferSubDataEXT (GLuint, GLintptr, GLsizeiptr, GLvoid *);
+GLAPI void APIENTRY glTextureBufferEXT (GLuint, GLenum, GLenum, GLuint);
+GLAPI void APIENTRY glMultiTexBufferEXT (GLenum, GLenum, GLenum, GLuint);
+GLAPI void APIENTRY glNamedRenderbufferStorageEXT (GLuint, GLenum, GLsizei, GLsizei);
+GLAPI void APIENTRY glGetNamedRenderbufferParameterivEXT (GLuint, GLenum, GLint *);
+GLAPI GLenum APIENTRY glCheckNamedFramebufferStatusEXT (GLuint, GLenum);
+GLAPI void APIENTRY glNamedFramebufferTexture1DEXT (GLuint, GLenum, GLenum, GLuint, GLint);
+GLAPI void APIENTRY glNamedFramebufferTexture2DEXT (GLuint, GLenum, GLenum, GLuint, GLint);
+GLAPI void APIENTRY glNamedFramebufferTexture3DEXT (GLuint, GLenum, GLenum, GLuint, GLint, GLint);
+GLAPI void APIENTRY glNamedFramebufferRenderbufferEXT (GLuint, GLenum, GLenum, GLuint);
+GLAPI void APIENTRY glGetNamedFramebufferAttachmentParameterivEXT (GLuint, GLenum, GLenum, GLint *);
+GLAPI void APIENTRY glGenerateTextureMipmapEXT (GLuint, GLenum);
+GLAPI void APIENTRY glGenerateMultiTexMipmapEXT (GLenum, GLenum);
+GLAPI void APIENTRY glFramebufferDrawBufferEXT (GLuint, GLenum);
+GLAPI void APIENTRY glFramebufferDrawBuffersEXT (GLuint, GLsizei, const GLenum *);
+GLAPI void APIENTRY glFramebufferReadBufferEXT (GLuint, GLenum);
+GLAPI void APIENTRY glGetFramebufferParameterivEXT (GLuint, GLenum, GLint *);
+GLAPI void APIENTRY glNamedRenderbufferStorageMultisampleEXT (GLuint, GLsizei, GLenum, GLsizei, GLsizei);
+GLAPI void APIENTRY glNamedRenderbufferStorageMultisampleCoverageEXT (GLuint, GLsizei, GLsizei, GLenum, GLsizei, GLsizei);
+GLAPI void APIENTRY glNamedFramebufferTextureEXT (GLuint, GLenum, GLuint, GLint);
+GLAPI void APIENTRY glNamedFramebufferTextureLayerEXT (GLuint, GLenum, GLuint, GLint, GLint);
+GLAPI void APIENTRY glNamedFramebufferTextureFaceEXT (GLuint, GLenum, GLuint, GLint, GLenum);
+GLAPI void APIENTRY glTextureRenderbufferEXT (GLuint, GLenum, GLuint);
+GLAPI void APIENTRY glMultiTexRenderbufferEXT (GLenum, GLenum, GLuint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLCLIENTATTRIBDEFAULTEXTPROC) (GLbitfield mask);
+typedef void (APIENTRYP PFNGLPUSHCLIENTATTRIBDEFAULTEXTPROC) (GLbitfield mask);
+typedef void (APIENTRYP PFNGLMATRIXLOADFEXTPROC) (GLenum mode, const GLfloat *m);
+typedef void (APIENTRYP PFNGLMATRIXLOADDEXTPROC) (GLenum mode, const GLdouble *m);
+typedef void (APIENTRYP PFNGLMATRIXMULTFEXTPROC) (GLenum mode, const GLfloat *m);
+typedef void (APIENTRYP PFNGLMATRIXMULTDEXTPROC) (GLenum mode, const GLdouble *m);
+typedef void (APIENTRYP PFNGLMATRIXLOADIDENTITYEXTPROC) (GLenum mode);
+typedef void (APIENTRYP PFNGLMATRIXROTATEFEXTPROC) (GLenum mode, GLfloat angle, GLfloat x, GLfloat y, GLfloat z);
+typedef void (APIENTRYP PFNGLMATRIXROTATEDEXTPROC) (GLenum mode, GLdouble angle, GLdouble x, GLdouble y, GLdouble z);
+typedef void (APIENTRYP PFNGLMATRIXSCALEFEXTPROC) (GLenum mode, GLfloat x, GLfloat y, GLfloat z);
+typedef void (APIENTRYP PFNGLMATRIXSCALEDEXTPROC) (GLenum mode, GLdouble x, GLdouble y, GLdouble z);
+typedef void (APIENTRYP PFNGLMATRIXTRANSLATEFEXTPROC) (GLenum mode, GLfloat x, GLfloat y, GLfloat z);
+typedef void (APIENTRYP PFNGLMATRIXTRANSLATEDEXTPROC) (GLenum mode, GLdouble x, GLdouble y, GLdouble z);
+typedef void (APIENTRYP PFNGLMATRIXFRUSTUMEXTPROC) (GLenum mode, GLdouble left, GLdouble right, GLdouble bottom, GLdouble top, GLdouble zNear, GLdouble zFar);
+typedef void (APIENTRYP PFNGLMATRIXORTHOEXTPROC) (GLenum mode, GLdouble left, GLdouble right, GLdouble bottom, GLdouble top, GLdouble zNear, GLdouble zFar);
+typedef void (APIENTRYP PFNGLMATRIXPOPEXTPROC) (GLenum mode);
+typedef void (APIENTRYP PFNGLMATRIXPUSHEXTPROC) (GLenum mode);
+typedef void (APIENTRYP PFNGLMATRIXLOADTRANSPOSEFEXTPROC) (GLenum mode, const GLfloat *m);
+typedef void (APIENTRYP PFNGLMATRIXLOADTRANSPOSEDEXTPROC) (GLenum mode, const GLdouble *m);
+typedef void (APIENTRYP PFNGLMATRIXMULTTRANSPOSEFEXTPROC) (GLenum mode, const GLfloat *m);
+typedef void (APIENTRYP PFNGLMATRIXMULTTRANSPOSEDEXTPROC) (GLenum mode, const GLdouble *m);
+typedef void (APIENTRYP PFNGLTEXTUREPARAMETERFEXTPROC) (GLuint texture, GLenum target, GLenum pname, GLfloat param);
+typedef void (APIENTRYP PFNGLTEXTUREPARAMETERFVEXTPROC) (GLuint texture, GLenum target, GLenum pname, const GLfloat *params);
+typedef void (APIENTRYP PFNGLTEXTUREPARAMETERIEXTPROC) (GLuint texture, GLenum target, GLenum pname, GLint param);
+typedef void (APIENTRYP PFNGLTEXTUREPARAMETERIVEXTPROC) (GLuint texture, GLenum target, GLenum pname, const GLint *params);
+typedef void (APIENTRYP PFNGLTEXTUREIMAGE1DEXTPROC) (GLuint texture, GLenum target, GLint level, GLenum internalformat, GLsizei width, GLint border, GLenum format, GLenum type, const GLvoid *pixels);
+typedef void (APIENTRYP PFNGLTEXTUREIMAGE2DEXTPROC) (GLuint texture, GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const GLvoid *pixels);
+typedef void (APIENTRYP PFNGLTEXTURESUBIMAGE1DEXTPROC) (GLuint texture, GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLenum type, const GLvoid *pixels);
+typedef void (APIENTRYP PFNGLTEXTURESUBIMAGE2DEXTPROC) (GLuint texture, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *pixels);
+typedef void (APIENTRYP PFNGLCOPYTEXTUREIMAGE1DEXTPROC) (GLuint texture, GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLint border);
+typedef void (APIENTRYP PFNGLCOPYTEXTUREIMAGE2DEXTPROC) (GLuint texture, GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height, GLint border);
+typedef void (APIENTRYP PFNGLCOPYTEXTURESUBIMAGE1DEXTPROC) (GLuint texture, GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width);
+typedef void (APIENTRYP PFNGLCOPYTEXTURESUBIMAGE2DEXTPROC) (GLuint texture, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height);
+typedef void (APIENTRYP PFNGLGETTEXTUREIMAGEEXTPROC) (GLuint texture, GLenum target, GLint level, GLenum format, GLenum type, GLvoid *pixels);
+typedef void (APIENTRYP PFNGLGETTEXTUREPARAMETERFVEXTPROC) (GLuint texture, GLenum target, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETTEXTUREPARAMETERIVEXTPROC) (GLuint texture, GLenum target, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETTEXTURELEVELPARAMETERFVEXTPROC) (GLuint texture, GLenum target, GLint level, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETTEXTURELEVELPARAMETERIVEXTPROC) (GLuint texture, GLenum target, GLint level, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLTEXTUREIMAGE3DEXTPROC) (GLuint texture, GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const GLvoid *pixels);
+typedef void (APIENTRYP PFNGLTEXTURESUBIMAGE3DEXTPROC) (GLuint texture, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const GLvoid *pixels);
+typedef void (APIENTRYP PFNGLCOPYTEXTURESUBIMAGE3DEXTPROC) (GLuint texture, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height);
+typedef void (APIENTRYP PFNGLMULTITEXPARAMETERFEXTPROC) (GLenum texunit, GLenum target, GLenum pname, GLfloat param);
+typedef void (APIENTRYP PFNGLMULTITEXPARAMETERFVEXTPROC) (GLenum texunit, GLenum target, GLenum pname, const GLfloat *params);
+typedef void (APIENTRYP PFNGLMULTITEXPARAMETERIEXTPROC) (GLenum texunit, GLenum target, GLenum pname, GLint param);
+typedef void (APIENTRYP PFNGLMULTITEXPARAMETERIVEXTPROC) (GLenum texunit, GLenum target, GLenum pname, const GLint *params);
+typedef void (APIENTRYP PFNGLMULTITEXIMAGE1DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLenum internalformat, GLsizei width, GLint border, GLenum format, GLenum type, const GLvoid *pixels);
+typedef void (APIENTRYP PFNGLMULTITEXIMAGE2DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const GLvoid *pixels);
+typedef void (APIENTRYP PFNGLMULTITEXSUBIMAGE1DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLenum type, const GLvoid *pixels);
+typedef void (APIENTRYP PFNGLMULTITEXSUBIMAGE2DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *pixels);
+typedef void (APIENTRYP PFNGLCOPYMULTITEXIMAGE1DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLint border);
+typedef void (APIENTRYP PFNGLCOPYMULTITEXIMAGE2DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height, GLint border);
+typedef void (APIENTRYP PFNGLCOPYMULTITEXSUBIMAGE1DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width);
+typedef void (APIENTRYP PFNGLCOPYMULTITEXSUBIMAGE2DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height);
+typedef void (APIENTRYP PFNGLGETMULTITEXIMAGEEXTPROC) (GLenum texunit, GLenum target, GLint level, GLenum format, GLenum type, GLvoid *pixels);
+typedef void (APIENTRYP PFNGLGETMULTITEXPARAMETERFVEXTPROC) (GLenum texunit, GLenum target, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETMULTITEXPARAMETERIVEXTPROC) (GLenum texunit, GLenum target, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETMULTITEXLEVELPARAMETERFVEXTPROC) (GLenum texunit, GLenum target, GLint level, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETMULTITEXLEVELPARAMETERIVEXTPROC) (GLenum texunit, GLenum target, GLint level, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLMULTITEXIMAGE3DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const GLvoid *pixels);
+typedef void (APIENTRYP PFNGLMULTITEXSUBIMAGE3DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const GLvoid *pixels);
+typedef void (APIENTRYP PFNGLCOPYMULTITEXSUBIMAGE3DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height);
+typedef void (APIENTRYP PFNGLBINDMULTITEXTUREEXTPROC) (GLenum texunit, GLenum target, GLuint texture);
+typedef void (APIENTRYP PFNGLENABLECLIENTSTATEINDEXEDEXTPROC) (GLenum array, GLuint index);
+typedef void (APIENTRYP PFNGLDISABLECLIENTSTATEINDEXEDEXTPROC) (GLenum array, GLuint index);
+typedef void (APIENTRYP PFNGLMULTITEXCOORDPOINTEREXTPROC) (GLenum texunit, GLint size, GLenum type, GLsizei stride, const GLvoid *pointer);
+typedef void (APIENTRYP PFNGLMULTITEXENVFEXTPROC) (GLenum texunit, GLenum target, GLenum pname, GLfloat param);
+typedef void (APIENTRYP PFNGLMULTITEXENVFVEXTPROC) (GLenum texunit, GLenum target, GLenum pname, const GLfloat *params);
+typedef void (APIENTRYP PFNGLMULTITEXENVIEXTPROC) (GLenum texunit, GLenum target, GLenum pname, GLint param);
+typedef void (APIENTRYP PFNGLMULTITEXENVIVEXTPROC) (GLenum texunit, GLenum target, GLenum pname, const GLint *params);
+typedef void (APIENTRYP PFNGLMULTITEXGENDEXTPROC) (GLenum texunit, GLenum coord, GLenum pname, GLdouble param);
+typedef void (APIENTRYP PFNGLMULTITEXGENDVEXTPROC) (GLenum texunit, GLenum coord, GLenum pname, const GLdouble *params);
+typedef void (APIENTRYP PFNGLMULTITEXGENFEXTPROC) (GLenum texunit, GLenum coord, GLenum pname, GLfloat param);
+typedef void (APIENTRYP PFNGLMULTITEXGENFVEXTPROC) (GLenum texunit, GLenum coord, GLenum pname, const GLfloat *params);
+typedef void (APIENTRYP PFNGLMULTITEXGENIEXTPROC) (GLenum texunit, GLenum coord, GLenum pname, GLint param);
+typedef void (APIENTRYP PFNGLMULTITEXGENIVEXTPROC) (GLenum texunit, GLenum coord, GLenum pname, const GLint *params);
+typedef void (APIENTRYP PFNGLGETMULTITEXENVFVEXTPROC) (GLenum texunit, GLenum target, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETMULTITEXENVIVEXTPROC) (GLenum texunit, GLenum target, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETMULTITEXGENDVEXTPROC) (GLenum texunit, GLenum coord, GLenum pname, GLdouble *params);
+typedef void (APIENTRYP PFNGLGETMULTITEXGENFVEXTPROC) (GLenum texunit, GLenum coord, GLenum pname, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETMULTITEXGENIVEXTPROC) (GLenum texunit, GLenum coord, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETFLOATINDEXEDVEXTPROC) (GLenum target, GLuint index, GLfloat *data);
+typedef void (APIENTRYP PFNGLGETDOUBLEINDEXEDVEXTPROC) (GLenum target, GLuint index, GLdouble *data);
+typedef void (APIENTRYP PFNGLGETPOINTERINDEXEDVEXTPROC) (GLenum target, GLuint index, GLvoid* *data);
+typedef void (APIENTRYP PFNGLCOMPRESSEDTEXTUREIMAGE3DEXTPROC) (GLuint texture, GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLsizei imageSize, const GLvoid *bits);
+typedef void (APIENTRYP PFNGLCOMPRESSEDTEXTUREIMAGE2DEXTPROC) (GLuint texture, GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const GLvoid *bits);
+typedef void (APIENTRYP PFNGLCOMPRESSEDTEXTUREIMAGE1DEXTPROC) (GLuint texture, GLenum target, GLint level, GLenum internalformat, GLsizei width, GLint border, GLsizei imageSize, const GLvoid *bits);
+typedef void (APIENTRYP PFNGLCOMPRESSEDTEXTURESUBIMAGE3DEXTPROC) (GLuint texture, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const GLvoid *bits);
+typedef void (APIENTRYP PFNGLCOMPRESSEDTEXTURESUBIMAGE2DEXTPROC) (GLuint texture, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const GLvoid *bits);
+typedef void (APIENTRYP PFNGLCOMPRESSEDTEXTURESUBIMAGE1DEXTPROC) (GLuint texture, GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLsizei imageSize, const GLvoid *bits);
+typedef void (APIENTRYP PFNGLGETCOMPRESSEDTEXTUREIMAGEEXTPROC) (GLuint texture, GLenum target, GLint lod, GLvoid *img);
+typedef void (APIENTRYP PFNGLCOMPRESSEDMULTITEXIMAGE3DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLsizei imageSize, const GLvoid *bits);
+typedef void (APIENTRYP PFNGLCOMPRESSEDMULTITEXIMAGE2DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const GLvoid *bits);
+typedef void (APIENTRYP PFNGLCOMPRESSEDMULTITEXIMAGE1DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLenum internalformat, GLsizei width, GLint border, GLsizei imageSize, const GLvoid *bits);
+typedef void (APIENTRYP PFNGLCOMPRESSEDMULTITEXSUBIMAGE3DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const GLvoid *bits);
+typedef void (APIENTRYP PFNGLCOMPRESSEDMULTITEXSUBIMAGE2DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const GLvoid *bits);
+typedef void (APIENTRYP PFNGLCOMPRESSEDMULTITEXSUBIMAGE1DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLsizei imageSize, const GLvoid *bits);
+typedef void (APIENTRYP PFNGLGETCOMPRESSEDMULTITEXIMAGEEXTPROC) (GLenum texunit, GLenum target, GLint lod, GLvoid *img);
+typedef void (APIENTRYP PFNGLNAMEDPROGRAMSTRINGEXTPROC) (GLuint program, GLenum target, GLenum format, GLsizei len, const GLvoid *string);
+typedef void (APIENTRYP PFNGLNAMEDPROGRAMLOCALPARAMETER4DEXTPROC) (GLuint program, GLenum target, GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w);
+typedef void (APIENTRYP PFNGLNAMEDPROGRAMLOCALPARAMETER4DVEXTPROC) (GLuint program, GLenum target, GLuint index, const GLdouble *params);
+typedef void (APIENTRYP PFNGLNAMEDPROGRAMLOCALPARAMETER4FEXTPROC) (GLuint program, GLenum target, GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+typedef void (APIENTRYP PFNGLNAMEDPROGRAMLOCALPARAMETER4FVEXTPROC) (GLuint program, GLenum target, GLuint index, const GLfloat *params);
+typedef void (APIENTRYP PFNGLGETNAMEDPROGRAMLOCALPARAMETERDVEXTPROC) (GLuint program, GLenum target, GLuint index, GLdouble *params);
+typedef void (APIENTRYP PFNGLGETNAMEDPROGRAMLOCALPARAMETERFVEXTPROC) (GLuint program, GLenum target, GLuint index, GLfloat *params);
+typedef void (APIENTRYP PFNGLGETNAMEDPROGRAMIVEXTPROC) (GLuint program, GLenum target, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETNAMEDPROGRAMSTRINGEXTPROC) (GLuint program, GLenum target, GLenum pname, GLvoid *string);
+typedef void (APIENTRYP PFNGLNAMEDPROGRAMLOCALPARAMETERS4FVEXTPROC) (GLuint program, GLenum target, GLuint index, GLsizei count, const GLfloat *params);
+typedef void (APIENTRYP PFNGLNAMEDPROGRAMLOCALPARAMETERI4IEXTPROC) (GLuint program, GLenum target, GLuint index, GLint x, GLint y, GLint z, GLint w);
+typedef void (APIENTRYP PFNGLNAMEDPROGRAMLOCALPARAMETERI4IVEXTPROC) (GLuint program, GLenum target, GLuint index, const GLint *params);
+typedef void (APIENTRYP PFNGLNAMEDPROGRAMLOCALPARAMETERSI4IVEXTPROC) (GLuint program, GLenum target, GLuint index, GLsizei count, const GLint *params);
+typedef void (APIENTRYP PFNGLNAMEDPROGRAMLOCALPARAMETERI4UIEXTPROC) (GLuint program, GLenum target, GLuint index, GLuint x, GLuint y, GLuint z, GLuint w);
+typedef void (APIENTRYP PFNGLNAMEDPROGRAMLOCALPARAMETERI4UIVEXTPROC) (GLuint program, GLenum target, GLuint index, const GLuint *params);
+typedef void (APIENTRYP PFNGLNAMEDPROGRAMLOCALPARAMETERSI4UIVEXTPROC) (GLuint program, GLenum target, GLuint index, GLsizei count, const GLuint *params);
+typedef void (APIENTRYP PFNGLGETNAMEDPROGRAMLOCALPARAMETERIIVEXTPROC) (GLuint program, GLenum target, GLuint index, GLint *params);
+typedef void (APIENTRYP PFNGLGETNAMEDPROGRAMLOCALPARAMETERIUIVEXTPROC) (GLuint program, GLenum target, GLuint index, GLuint *params);
+typedef void (APIENTRYP PFNGLTEXTUREPARAMETERIIVEXTPROC) (GLuint texture, GLenum target, GLenum pname, const GLint *params);
+typedef void (APIENTRYP PFNGLTEXTUREPARAMETERIUIVEXTPROC) (GLuint texture, GLenum target, GLenum pname, const GLuint *params);
+typedef void (APIENTRYP PFNGLGETTEXTUREPARAMETERIIVEXTPROC) (GLuint texture, GLenum target, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETTEXTUREPARAMETERIUIVEXTPROC) (GLuint texture, GLenum target, GLenum pname, GLuint *params);
+typedef void (APIENTRYP PFNGLMULTITEXPARAMETERIIVEXTPROC) (GLenum texunit, GLenum target, GLenum pname, const GLint *params);
+typedef void (APIENTRYP PFNGLMULTITEXPARAMETERIUIVEXTPROC) (GLenum texunit, GLenum target, GLenum pname, const GLuint *params);
+typedef void (APIENTRYP PFNGLGETMULTITEXPARAMETERIIVEXTPROC) (GLenum texunit, GLenum target, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETMULTITEXPARAMETERIUIVEXTPROC) (GLenum texunit, GLenum target, GLenum pname, GLuint *params);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM1FEXTPROC) (GLuint program, GLint location, GLfloat v0);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM2FEXTPROC) (GLuint program, GLint location, GLfloat v0, GLfloat v1);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM3FEXTPROC) (GLuint program, GLint location, GLfloat v0, GLfloat v1, GLfloat v2);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM4FEXTPROC) (GLuint program, GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM1IEXTPROC) (GLuint program, GLint location, GLint v0);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM2IEXTPROC) (GLuint program, GLint location, GLint v0, GLint v1);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM3IEXTPROC) (GLuint program, GLint location, GLint v0, GLint v1, GLint v2);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM4IEXTPROC) (GLuint program, GLint location, GLint v0, GLint v1, GLint v2, GLint v3);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM1FVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLfloat *value);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM2FVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLfloat *value);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM3FVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLfloat *value);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM4FVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLfloat *value);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM1IVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLint *value);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM2IVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLint *value);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM3IVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLint *value);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM4IVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLint *value);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORMMATRIX2FVEXTPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORMMATRIX3FVEXTPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORMMATRIX4FVEXTPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORMMATRIX2X3FVEXTPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORMMATRIX3X2FVEXTPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORMMATRIX2X4FVEXTPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORMMATRIX4X2FVEXTPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORMMATRIX3X4FVEXTPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORMMATRIX4X3FVEXTPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM1UIEXTPROC) (GLuint program, GLint location, GLuint v0);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM2UIEXTPROC) (GLuint program, GLint location, GLuint v0, GLuint v1);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM3UIEXTPROC) (GLuint program, GLint location, GLuint v0, GLuint v1, GLuint v2);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM4UIEXTPROC) (GLuint program, GLint location, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM1UIVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLuint *value);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM2UIVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLuint *value);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM3UIVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLuint *value);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM4UIVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLuint *value);
+typedef void (APIENTRYP PFNGLNAMEDBUFFERDATAEXTPROC) (GLuint buffer, GLsizeiptr size, const GLvoid *data, GLenum usage);
+typedef void (APIENTRYP PFNGLNAMEDBUFFERSUBDATAEXTPROC) (GLuint buffer, GLintptr offset, GLsizeiptr size, const GLvoid *data);
+typedef GLvoid* (APIENTRYP PFNGLMAPNAMEDBUFFEREXTPROC) (GLuint buffer, GLenum access);
+typedef GLboolean (APIENTRYP PFNGLUNMAPNAMEDBUFFEREXTPROC) (GLuint buffer);
+typedef void (APIENTRYP PFNGLGETNAMEDBUFFERPARAMETERIVEXTPROC) (GLuint buffer, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGETNAMEDBUFFERPOINTERVEXTPROC) (GLuint buffer, GLenum pname, GLvoid* *params);
+typedef void (APIENTRYP PFNGLGETNAMEDBUFFERSUBDATAEXTPROC) (GLuint buffer, GLintptr offset, GLsizeiptr size, GLvoid *data);
+typedef void (APIENTRYP PFNGLTEXTUREBUFFEREXTPROC) (GLuint texture, GLenum target, GLenum internalformat, GLuint buffer);
+typedef void (APIENTRYP PFNGLMULTITEXBUFFEREXTPROC) (GLenum texunit, GLenum target, GLenum internalformat, GLuint buffer);
+typedef void (APIENTRYP PFNGLNAMEDRENDERBUFFERSTORAGEEXTPROC) (GLuint renderbuffer, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (APIENTRYP PFNGLGETNAMEDRENDERBUFFERPARAMETERIVEXTPROC) (GLuint renderbuffer, GLenum pname, GLint *params);
+typedef GLenum (APIENTRYP PFNGLCHECKNAMEDFRAMEBUFFERSTATUSEXTPROC) (GLuint framebuffer, GLenum target);
+typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERTEXTURE1DEXTPROC) (GLuint framebuffer, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
+typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERTEXTURE2DEXTPROC) (GLuint framebuffer, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
+typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERTEXTURE3DEXTPROC) (GLuint framebuffer, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLint zoffset);
+typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERRENDERBUFFEREXTPROC) (GLuint framebuffer, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer);
+typedef void (APIENTRYP PFNGLGETNAMEDFRAMEBUFFERATTACHMENTPARAMETERIVEXTPROC) (GLuint framebuffer, GLenum attachment, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLGENERATETEXTUREMIPMAPEXTPROC) (GLuint texture, GLenum target);
+typedef void (APIENTRYP PFNGLGENERATEMULTITEXMIPMAPEXTPROC) (GLenum texunit, GLenum target);
+typedef void (APIENTRYP PFNGLFRAMEBUFFERDRAWBUFFEREXTPROC) (GLuint framebuffer, GLenum mode);
+typedef void (APIENTRYP PFNGLFRAMEBUFFERDRAWBUFFERSEXTPROC) (GLuint framebuffer, GLsizei n, const GLenum *bufs);
+typedef void (APIENTRYP PFNGLFRAMEBUFFERREADBUFFEREXTPROC) (GLuint framebuffer, GLenum mode);
+typedef void (APIENTRYP PFNGLGETFRAMEBUFFERPARAMETERIVEXTPROC) (GLuint framebuffer, GLenum pname, GLint *params);
+typedef void (APIENTRYP PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLEEXTPROC) (GLuint renderbuffer, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (APIENTRYP PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLECOVERAGEEXTPROC) (GLuint renderbuffer, GLsizei coverageSamples, GLsizei colorSamples, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERTEXTUREEXTPROC) (GLuint framebuffer, GLenum attachment, GLuint texture, GLint level);
+typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERTEXTURELAYEREXTPROC) (GLuint framebuffer, GLenum attachment, GLuint texture, GLint level, GLint layer);
+typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERTEXTUREFACEEXTPROC) (GLuint framebuffer, GLenum attachment, GLuint texture, GLint level, GLenum face);
+typedef void (APIENTRYP PFNGLTEXTURERENDERBUFFEREXTPROC) (GLuint texture, GLenum target, GLuint renderbuffer);
+typedef void (APIENTRYP PFNGLMULTITEXRENDERBUFFEREXTPROC) (GLenum texunit, GLenum target, GLuint renderbuffer);
+#endif
+
+#ifndef GL_EXT_vertex_array_bgra
+#define GL_EXT_vertex_array_bgra 1
+#endif
+
+#ifndef GL_EXT_texture_swizzle
+#define GL_EXT_texture_swizzle 1
+#endif
+
+#ifndef GL_NV_explicit_multisample
+#define GL_NV_explicit_multisample 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glGetMultisamplefvNV (GLenum, GLuint, GLfloat *);
+GLAPI void APIENTRY glSampleMaskIndexedNV (GLuint, GLbitfield);
+GLAPI void APIENTRY glTexRenderbufferNV (GLenum, GLuint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLGETMULTISAMPLEFVNVPROC) (GLenum pname, GLuint index, GLfloat *val);
+typedef void (APIENTRYP PFNGLSAMPLEMASKINDEXEDNVPROC) (GLuint index, GLbitfield mask);
+typedef void (APIENTRYP PFNGLTEXRENDERBUFFERNVPROC) (GLenum target, GLuint renderbuffer);
+#endif
+
+#ifndef GL_NV_transform_feedback2
+#define GL_NV_transform_feedback2 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glBindTransformFeedbackNV (GLenum, GLuint);
+GLAPI void APIENTRY glDeleteTransformFeedbacksNV (GLsizei, const GLuint *);
+GLAPI void APIENTRY glGenTransformFeedbacksNV (GLsizei, GLuint *);
+GLAPI GLboolean APIENTRY glIsTransformFeedbackNV (GLuint);
+GLAPI void APIENTRY glPauseTransformFeedbackNV (void);
+GLAPI void APIENTRY glResumeTransformFeedbackNV (void);
+GLAPI void APIENTRY glDrawTransformFeedbackNV (GLenum, GLuint);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLBINDTRANSFORMFEEDBACKNVPROC) (GLenum target, GLuint id);
+typedef void (APIENTRYP PFNGLDELETETRANSFORMFEEDBACKSNVPROC) (GLsizei n, const GLuint *ids);
+typedef void (APIENTRYP PFNGLGENTRANSFORMFEEDBACKSNVPROC) (GLsizei n, GLuint *ids);
+typedef GLboolean (APIENTRYP PFNGLISTRANSFORMFEEDBACKNVPROC) (GLuint id);
+typedef void (APIENTRYP PFNGLPAUSETRANSFORMFEEDBACKNVPROC) (void);
+typedef void (APIENTRYP PFNGLRESUMETRANSFORMFEEDBACKNVPROC) (void);
+typedef void (APIENTRYP PFNGLDRAWTRANSFORMFEEDBACKNVPROC) (GLenum mode, GLuint id);
+#endif
+
+#ifndef GL_ATI_meminfo
+#define GL_ATI_meminfo 1
+#endif
+
+#ifndef GL_AMD_performance_monitor
+#define GL_AMD_performance_monitor 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glGetPerfMonitorGroupsAMD (GLint *, GLsizei, GLuint *);
+GLAPI void APIENTRY glGetPerfMonitorcountersAMD (GLuint, GLint *, GLint *, GLsizei, GLuint *);
+GLAPI void APIENTRY glGetPerfMonitorGroupStringAMD (GLuint, GLsizei, GLsizei *, GLchar *);
+GLAPI void APIENTRY glGetPerfMonitorcounterStringAMD (GLuint, GLuint, GLsizei, GLsizei *, GLchar *);
+GLAPI void APIENTRY glGetPerfMonitorcounterInfoAMD (GLuint, GLuint, GLenum, void *);
+GLAPI void APIENTRY glGenPerfMonitorsAMD (GLsizei, GLuint *);
+GLAPI void APIENTRY glDeletePerfMonitorsAMD (GLsizei, GLuint *);
+GLAPI void APIENTRY glSelectPerfMonitorcountersAMD (GLuint, GLboolean, GLuint, GLint, GLuint *);
+GLAPI void APIENTRY glBeginPerfMonitorAMD (GLuint);
+GLAPI void APIENTRY glEndPerfMonitorAMD (GLuint);
+GLAPI void APIENTRY glGetPerfMonitorcounterDataAMD (GLuint, GLenum, GLsizei, GLuint *, GLint *);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLGETPERFMONITORGROUPSAMDPROC) (GLint *numGroups, GLsizei groupsSize, GLuint *groups);
+typedef void (APIENTRYP PFNGLGETPERFMONITORCOUNTERSAMDPROC) (GLuint group, GLint *numcounters, GLint *maxActivecounters, GLsizei counterSize, GLuint *counters);
+typedef void (APIENTRYP PFNGLGETPERFMONITORGROUPSTRINGAMDPROC) (GLuint group, GLsizei bufSize, GLsizei *length, GLchar *groupString);
+typedef void (APIENTRYP PFNGLGETPERFMONITORCOUNTERSTRINGAMDPROC) (GLuint group, GLuint counter, GLsizei bufSize, GLsizei *length, GLchar *counterString);
+typedef void (APIENTRYP PFNGLGETPERFMONITORCOUNTERINFOAMDPROC) (GLuint group, GLuint counter, GLenum pname, void *data);
+typedef void (APIENTRYP PFNGLGENPERFMONITORSAMDPROC) (GLsizei n, GLuint *monitors);
+typedef void (APIENTRYP PFNGLDELETEPERFMONITORSAMDPROC) (GLsizei n, GLuint *monitors);
+typedef void (APIENTRYP PFNGLSELECTPERFMONITORCOUNTERSAMDPROC) (GLuint monitor, GLboolean enable, GLuint group, GLint numcounters, GLuint *counterList);
+typedef void (APIENTRYP PFNGLBEGINPERFMONITORAMDPROC) (GLuint monitor);
+typedef void (APIENTRYP PFNGLENDPERFMONITORAMDPROC) (GLuint monitor);
+typedef void (APIENTRYP PFNGLGETPERFMONITORCOUNTERDATAAMDPROC) (GLuint monitor, GLenum pname, GLsizei dataSize, GLuint *data, GLint *bytesWritten);
+#endif
+
+#ifndef GL_AMD_texture_texture4
+#define GL_AMD_texture_texture4 1
+#endif
+
+#ifndef GL_AMD_vertex_shader_tesselator
+#define GL_AMD_vertex_shader_tesselator 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glTessellationFactorAMD (GLfloat);
+GLAPI void APIENTRY glTessellationModeAMD (GLenum);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLTESSELLATIONFACTORAMDPROC) (GLfloat factor);
+typedef void (APIENTRYP PFNGLTESSELLATIONMODEAMDPROC) (GLenum mode);
+#endif
+
+#ifndef GL_EXT_provoking_vertex
+#define GL_EXT_provoking_vertex 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glProvokingVertexEXT (GLenum);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLPROVOKINGVERTEXEXTPROC) (GLenum mode);
+#endif
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/CL/opencl.h b/include/CL/opencl.h
new file mode 100644
index 0000000..938469c
--- /dev/null
+++ b/include/CL/opencl.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
+
+#ifndef __OPENCL_H
+#define __OPENCL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef __APPLE__
+
+#include <OpenCL/cl.h>
+#include <OpenCL/cl_gl.h>
+#include <OpenCL/cl_gl_ext.h>
+#include <OpenCL/cl_ext.h>
+
+#else
+
+#include <CL/cl.h>
+#include <CL/cl_gl.h>
+#include <CL/cl_gl_ext.h>
+#include <CL/cl_ext.h>
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __OPENCL_H */
+
diff --git a/kernels/compiler_argument_structure.cl b/kernels/compiler_argument_structure.cl
new file mode 100644
index 0000000..ab7896e
--- /dev/null
+++ b/kernels/compiler_argument_structure.cl
@@ -0,0 +1,9 @@
+struct hop { int x, y; };
+
+__kernel void
+compiler_argument_structure(__global int *dst, struct hop h)
+{
+ int id = (int)get_global_id(0);
+ dst[id] = h.x + h.y;
+}
+
diff --git a/kernels/compiler_argument_structure_indirect.cl b/kernels/compiler_argument_structure_indirect.cl
new file mode 100644
index 0000000..c4b062f
--- /dev/null
+++ b/kernels/compiler_argument_structure_indirect.cl
@@ -0,0 +1,9 @@
+struct hop { int x[16]; };
+
+__kernel void
+compiler_argument_structure(__global int *dst, struct hop h)
+{
+ int id = (int)get_global_id(0);
+ dst[id] = h.x[get_local_id(0)];
+}
+
diff --git a/kernels/compiler_array.cl b/kernels/compiler_array.cl
new file mode 100644
index 0000000..5dce4d9
--- /dev/null
+++ b/kernels/compiler_array.cl
@@ -0,0 +1,14 @@
+__kernel void
+compiler_array(__global int *src, __global int *dst)
+{
+ int array[16];
+ int i;
+ for (i = 0; i < 16; ++i) {
+ if (src[0] > 10)
+ array[i] = get_local_id(0);
+ else
+ array[15 - i] = 3 + get_local_id(1);
+ }
+ dst[get_global_id(0)] = array[get_local_id(0)];
+}
+
diff --git a/kernels/compiler_array0.cl b/kernels/compiler_array0.cl
new file mode 100644
index 0000000..3ab0fb8
--- /dev/null
+++ b/kernels/compiler_array0.cl
@@ -0,0 +1,16 @@
+__kernel void
+compiler_array0(__global int *src, __global int *dst)
+{
+ int i;
+ int final[16];
+ for (i = 0; i < 16; ++i) {
+ int array[16], j;
+ for (j = 0; j < 16; ++j)
+ array[j] = get_global_id(0);
+ for (j = 0; j < src[0]; ++j)
+ array[j] = 1+src[j];
+ final[i] = array[i];
+ }
+ dst[get_global_id(0)] = final[get_global_id(0)];
+}
+
diff --git a/kernels/compiler_array1.cl b/kernels/compiler_array1.cl
new file mode 100644
index 0000000..ad567c2
--- /dev/null
+++ b/kernels/compiler_array1.cl
@@ -0,0 +1,15 @@
+__kernel void
+compiler_array1(__global int *src, __global int *dst)
+{
+ int final[16];
+ for (int i = 0; i < 16; ++i) {
+ int array[16];
+ for (int j = 0; j < src[0]; ++j)
+ array[j] = 1+src[0];
+ for (int j = src[0]; j < 16; ++j)
+ array[j] = get_global_id(0);
+ final[i] = array[i];
+ }
+ dst[get_global_id(0)] = final[get_global_id(0)];
+}
+
diff --git a/kernels/compiler_array2.cl b/kernels/compiler_array2.cl
new file mode 100644
index 0000000..ae73932
--- /dev/null
+++ b/kernels/compiler_array2.cl
@@ -0,0 +1,13 @@
+__kernel void
+compiler_array2(__global int *src, __global int *dst)
+{
+ int final[16];
+ int array[16];
+ for (int j = 0; j < 16; ++j) array[j] = j;
+ for (int j = 0; j < 16; ++j) final[j] = j+1;
+ if (get_global_id(0) == 15)
+ dst[get_global_id(0)] = final[get_global_id(0)];
+ else
+ dst[get_global_id(0)] = array[15 - get_global_id(0)];
+}
+
diff --git a/kernels/compiler_array3.cl b/kernels/compiler_array3.cl
new file mode 100644
index 0000000..152c22a
--- /dev/null
+++ b/kernels/compiler_array3.cl
@@ -0,0 +1,14 @@
+__kernel void
+compiler_array3(__global int *src, __global int *dst)
+{
+ int tmp[32];
+ for (int i = 0; i < 16; ++i) {
+ for (int j = 0; j < 16; ++j)
+ tmp[j] = get_global_id(0);
+ for (int j = 0; j < src[0]; ++j)
+ tmp[j] = 1+src[j];
+ tmp[16+i] = tmp[i];
+ }
+ dst[get_global_id(0)] = tmp[16+get_global_id(0)];
+}
+
diff --git a/kernels/compiler_box_blur.cl b/kernels/compiler_box_blur.cl
new file mode 100644
index 0000000..0c6b657
--- /dev/null
+++ b/kernels/compiler_box_blur.cl
@@ -0,0 +1,113 @@
+inline float3 unpack_fp3(uint u) {
+ float3 u3;
+ u3.x = (float) (u & 0xff); u >>= 8;
+ u3.y = (float) (u & 0xff); u >>= 8;
+ u3.z = (float) (u & 0xff);
+ return u3;
+}
+
+inline uint pack_fp3(float3 u3) {
+ uint u;
+ u = (((uint) u3.x)) | (((uint) u3.y) << 8) | (((uint) u3.z) << 16);
+ return u;
+}
+
+#define HFILTER3(C0, C1, C2, C3, CURR, LEFT, RIGHT)\
+ float3 C0, C1, C2, C3;\
+ do {\
+ const uint4 from = vload4(CURR, src);\
+ const float3 from0 = unpack_fp3(from.x);\
+ const float3 from1 = unpack_fp3(from.y);\
+ const float3 from2 = unpack_fp3(from.z);\
+ const float3 from3 = unpack_fp3(from.w);\
+ const float3 l = unpack_fp3(src[LEFT]);\
+ const float3 r = unpack_fp3(src[RIGHT]);\
+ C0 = (l+from0+from1);\
+ C1 = (from0+from1+from2);\
+ C2 = (from1+from2+from3);\
+ C3 = (from2+from3+r);\
+ } while(0)
+#if 1
+__kernel void compiler_box_blur(__global const uint *src,
+ __global uint *dst,
+ int w,
+ int h,
+ int chunk)
+{
+ const int x = get_global_id(0);
+ int y = get_global_id(1)*chunk;
+ const int yend = min(y + chunk, h); /* we process a tile in the image */
+
+ /* Current line (left (1 pixel), center (4 pixels), right (1 pixel)) */
+ const int left = max(4*x-1 + y*w, y*w);
+ const int right = min(4*x+4 + y*w, y*w+w-1);
+ int curr = x + y*(w>>2);
+ HFILTER3(curr0, curr1, curr2, curr3, curr, left, right);
+
+ /* Top line (left (1 pixel), center (4 pixels), right (1 pixel)) */
+ const int ytop = max(y-1,0);
+ const int topLeft = max(4*x-1 + ytop*w, ytop*w);
+ const int topRight = min(4*x+4 + ytop*w, ytop*w+w-1);
+ const int top = x + ytop*(w>>2);
+ HFILTER3(top0, top1, top2, top3, top, topLeft, topRight);
+
+ /* To guard bottom line */
+ const int maxBottom = x + (h-1)*(w>>2);
+ const int maxBottomLeft = max(x-1,0) + (h-1)*w;
+ const int maxBottomRight = min(x+1,w-1) + (h-1)*w;
+
+ /* We use a short 3 pixel sliding window */
+ const int ybottom = min(y+1,h-1);
+ int bottomLeft = max(4*x-1 + ybottom*w, ybottom*w);
+ int bottomRight = min(4*x+4 + ybottom*w, ybottom*w+w-1);
+ int bottom = x + ybottom*(w>>2);
+
+ /* Top down sliding window */
+ for (; y < yend; ++y, curr += (w>>2), bottom += (w>>2), bottomLeft += w, bottomRight += w) {
+ const int center = min(bottom, maxBottom);
+ const int left = min(bottomLeft, maxBottomLeft);
+ const int right = min(bottomRight, maxBottomRight);
+ HFILTER3(bottom0, bottom1, bottom2, bottom3, center, left, right);
+ const float3 to0 = (top0+curr0+bottom0)*(1.f/9.f);
+ const float3 to1 = (top1+curr1+bottom1)*(1.f/9.f);
+ const float3 to2 = (top2+curr2+bottom2)*(1.f/9.f);
+ const float3 to3 = (top3+curr3+bottom3)*(1.f/9.f);
+ const uint4 to = (uint4)(pack_fp3(to0),pack_fp3(to1),pack_fp3(to2),pack_fp3(to3));
+ vstore4(to, curr, dst);
+ top0 = curr0; top1 = curr1; top2 = curr2; top3 = curr3;
+ curr0 = bottom0; curr1 = bottom1; curr2 = bottom2; curr3 = bottom3;
+ }
+}
+#else
+
+__kernel void compiler_box_blur(__global const uint *src,
+ __global uint *dst,
+ int w,
+ int h,
+ int chunk)
+{
+ const int x = get_global_id(0);
+ int y = 0;
+ const int yend = min(y + 64, h); /* we process a tile in the image */
+
+ /* Current line (left (1 pixel), center (4 pixels), right (1 pixel)) */
+ int curr = x + y*32;
+
+ /* Top down sliding window */
+ for (; y < yend; ++y, curr += (w>>2)) {
+ float3 d = (float3)(255.f,255.f,255.f);
+ const uint4 to = (uint4)(pack_fp3(d),pack_fp3(d),pack_fp3(d),pack_fp3(d));
+#if 0
+ dst[4*curr+0] = (int)dst;
+ dst[4*curr+1] = (int)dst;
+ dst[4*curr+2] = (int)dst;
+ dst[4*curr+3] = (int)dst;
+#endif
+ dst[4*curr+0] = to.x;
+ dst[4*curr+1] = to.y;
+ dst[4*curr+2] = to.z;
+ dst[4*curr+3] = to.w;
+ }
+}
+#endif
+
diff --git a/kernels/compiler_box_blur_float.cl b/kernels/compiler_box_blur_float.cl
new file mode 100644
index 0000000..6f4e1b9
--- /dev/null
+++ b/kernels/compiler_box_blur_float.cl
@@ -0,0 +1,48 @@
+__kernel void compiler_box_blur_float(__global const float4 *src,
+ __global float4 *dst,
+ int w,
+ int h,
+ int chunk)
+{
+ const int x = get_global_id(0);
+ int y = get_global_id(1)*chunk;
+ const int yend = min(y+chunk, h); /* we process a tile in the image */
+
+ /* Current line (left (1 pixel), center (4 pixels), right (1 pixel)) */
+ const int left = max(x-1,0) + y*w;
+ const int right = min(x+1,w-1) + y*w;
+ int curr = x + y*w;
+ float4 currPixel = src[left] + src[curr] + src[right];
+
+ /* Top line (left (1 pixel), center (4 pixels), right (1 pixel)) */
+ const int ytop = max(y-1,0);
+ const int topLeft = max(x-1,0) + ytop*w;
+ const int topRight = min(x+1,w-1) + ytop*w;
+ const int top = x + ytop*w;
+ float4 topPixel = src[topLeft] + src[top] + src[topRight];
+
+ /* To guard bottom line */
+ const int maxBottom = x + (h-1)*w;
+ const int maxBottomLeft = max(x-1,0) + (h-1)*w;
+ const int maxBottomRight = min(x+1,w-1) + (h-1)*w;
+
+ /* We use a short 4 pixel sliding window */
+ const int ybottom = min(y+1,h-1);
+ int bottomLeft = max(x-1 + ybottom*w, ybottom*w);
+ int bottomRight = min(x+1 + ybottom*w, ybottom*w+w-1);
+ int bottom = x + ybottom*w;
+
+
+ /* Top down sliding window */
+ for (; y < yend; ++y, curr += w, bottom += w, bottomLeft += w, bottomRight += w) {
+ const int center = min(bottom, maxBottom);
+ const int left = min(bottomLeft, maxBottomLeft);
+ const int right = min(bottomRight, maxBottomRight);
+ const float4 bottomPixel = src[left] + src[center] + src[right];
+ const float4 to = (bottomPixel + currPixel + topPixel) * (1.f/9.f);
+ dst[curr] = to;
+ topPixel = currPixel;
+ currPixel = bottomPixel;
+ }
+}
+
diff --git a/kernels/compiler_box_blur_float_ref.bmp b/kernels/compiler_box_blur_float_ref.bmp
new file mode 100644
index 0000000..149cbba
Binary files /dev/null and b/kernels/compiler_box_blur_float_ref.bmp differ
diff --git a/kernels/compiler_box_blur_ref.bmp b/kernels/compiler_box_blur_ref.bmp
new file mode 100644
index 0000000..fd91008
Binary files /dev/null and b/kernels/compiler_box_blur_ref.bmp differ
diff --git a/kernels/compiler_byte_scatter.cl b/kernels/compiler_byte_scatter.cl
new file mode 100644
index 0000000..ab56ba8
--- /dev/null
+++ b/kernels/compiler_byte_scatter.cl
@@ -0,0 +1,7 @@
+__kernel void
+compiler_byte_scatter(__global char *dst)
+{
+ int id = (int) get_global_id(0);
+ dst[id] = (char) id;
+}
+
diff --git a/kernels/compiler_chocolux.cl b/kernels/compiler_chocolux.cl
new file mode 100644
index 0000000..218f65d
--- /dev/null
+++ b/kernels/compiler_chocolux.cl
@@ -0,0 +1,64 @@
+typedef float2 vec2;
+typedef float3 vec3;
+typedef float4 vec4;
+
+#define sin native_sin
+#define cos native_cos
+#define tan native_tan
+#define normalize fast_normalize
+#define length fast_length
+#define mod fmod
+#define time 10.f
+
+inline vec3 reflect(vec3 I, vec3 N) {
+ return I - 2.0f * dot(N, I) * N;
+}
+
+inline uint pack_fp4(float4 u4) {
+ uint u;
+ u = (((uint) u4.x)) |
+ (((uint) u4.y) << 8) |
+ (((uint) u4.z) << 16);
+ return u;
+}
+
+#define OUTPUT do {\
+ const vec4 final = 255.f * max(min(gl_FragColor, (vec4)(1.f)), (vec4)(0.f)); \
+ dst[get_global_id(0) + get_global_id(1) * w] = pack_fp4(final); \
+} while (0)
+
+__kernel void compiler_chocolux(__global uint *dst, float resx, float resy, int w)
+{
+ vec2 gl_FragCoord = (vec2)(get_global_id(0), get_global_id(1));
+ vec3 s[4];
+ s[0]=(vec3)(0);
+ s[3]=(vec3)(sin(time),cos(time),0);
+ s[1]=s[3].zxy;
+ s[2]=s[3].zzx;
+
+ float t,b,c,h=0.0f;
+ vec3 m,n;
+ vec3 p=(vec3)(.2f);
+ vec3 d=normalize(.001f*(vec3)(gl_FragCoord,.0f)-p);
+
+ for(int i=0;i<4;i++)
+ {
+ t=2.0f;
+ for(int i=0;i<4;i++)
+ {
+ b=dot(d,n=s[i]-p);
+ c=b*b+.2f-dot(n,n);
+ if(b-c<t)
+ if(c>0.0f)
+ {
+ m=s[i];t=b-c;
+ }
+ }
+ p+=t*d;
+ d=reflect(d,n=normalize(p-m));
+ h+=pow(n.x*n.x,44.f)+n.x*n.x*.2f;
+ }
+ vec4 gl_FragColor=(vec4)(h,h*h,h*h*h*h,1.f);
+ OUTPUT;
+}
+
diff --git a/kernels/compiler_chocolux_ref.bmp b/kernels/compiler_chocolux_ref.bmp
new file mode 100644
index 0000000..e51a4a7
Binary files /dev/null and b/kernels/compiler_chocolux_ref.bmp differ
diff --git a/kernels/compiler_clod.cl b/kernels/compiler_clod.cl
new file mode 100644
index 0000000..e21d9f5
--- /dev/null
+++ b/kernels/compiler_clod.cl
@@ -0,0 +1,91 @@
+typedef float2 vec2;
+typedef float3 vec3;
+typedef float4 vec4;
+
+#define sin native_sin
+#define cos native_cos
+#define tan native_tan
+#define normalize fast_normalize
+#define length fast_length
+#define mod fmod
+
+inline vec3 reflect(vec3 I, vec3 N) {
+ return I - 2.0f * dot(N, I) * N;
+}
+
+inline uint pack_fp4(float4 u4) {
+ uint u;
+ u = (((uint) u4.x)) |
+ (((uint) u4.y) << 8) |
+ (((uint) u4.z) << 16);
+ return u;
+}
+
+#define OUTPUT do {\
+ const vec4 final = 255.f * max(min(gl_FragColor, (vec4)(1.f)), (vec4)(0.f)); \
+ dst[get_global_id(0) + get_global_id(1) * w] = pack_fp4(final); \
+} while (0)
+
+#define time 1.f
+
+float f(vec3 o)
+{
+ float a=(sin(o.x)+o.y*.25f)*.35f;
+ o=(vec3)(cos(a)*o.x-sin(a)*o.y,sin(a)*o.x+cos(a)*o.y,o.z);
+ return dot(cos(o)*cos(o),(vec3)(1.f))-1.2f;
+}
+
+// XXX front end does not inline this function
+__attribute((always_inline)) vec3 s(vec3 o,vec3 d)
+{
+ float t=0.0f;
+ float dt = 0.2f;
+ float nh = 0.0f;
+ float lh = 0.0f;
+ for(int i=0;i<50;i++)
+ {
+ nh = f(o+d*t);
+ if(nh>0.0f) { lh=nh; t+=dt; }
+ }
+
+ if( nh>0.0f ) return (vec3)(.93f,.94f,.85f);
+
+ t = t - dt*nh/(nh-lh);
+
+ vec3 exyy=(vec3)(0.1f,0.0f,0.0f);
+ vec3 eyxy=(vec3)(0.0f,0.1f,0.0f);
+ vec3 eyyx=(vec3)(0.0f,0.0f,0.1f);
+ vec3 p=o+d*t;
+ vec3 n=-normalize((vec3)(f(p+exyy),f(p+eyxy),f(p+eyyx))+(vec3)((sin(p*75.f)))*.01f);
+
+ return (vec3)(mix( ((max(-dot(n,(vec3)(.577f)),0.f) + 0.125f*max(-dot(n,(vec3)(-.707f,-.707f,0.f)),0.f)))*(mod
+ (length(p.xy)*20.f,2.f)<1.0f?(vec3)(.71f,.85f,.25f):(vec3)(.79f,.93f,.4f))
+ ,(vec3)(.93f,.94f,.85f), (vec3)(pow(t/9.f,5.f)) ) );
+}
+
+#if 0
+// XXX vector type in the function arguments not supported yet
+__kernel void compiler_clod(__global uint *dst, vec2 resolution, int w)
+{
+ vec2 gl_FragCoord = (vec2)(get_global_id(0), get_global_id(1));
+ //vec2 p = -1.0f + 2.0f * gl_FragCoord.xy / resolution.xy;
+ vec2 p;
+ p.x = -1.0f + 2.0f * gl_FragCoord.x / resolution.x;
+ p.y = -1.0f + 2.0f * gl_FragCoord.y / resolution.y;
+ vec4 gl_FragColor=(vec4)(s((vec3)(sin(time*1.5f)*.5f,cos(time)*.5f,time), normalize((vec3)(p.xy,1.0f))),1.0f);
+ OUTPUT;
+}
+#else
+__kernel void compiler_clod(__global uint *dst, float resx, float resy, int w)
+{
+ vec2 gl_FragCoord = (vec2)(get_global_id(0), get_global_id(1));
+ //vec2 p = -1.0f + 2.0f * gl_FragCoord.xy / resolution.xy;
+ vec2 p;
+ p.x = -1.0f + 2.0f * gl_FragCoord.x / resx;
+ p.y = -1.0f + 2.0f * gl_FragCoord.y / resy;
+ vec4 gl_FragColor=(vec4)(s((vec3)(sin(time*1.5f)*.5f,cos(time)*.5f,time), normalize((vec3)(p.xy,1.0f))),1.0f);
+ OUTPUT;
+}
+
+#endif
+
diff --git a/kernels/compiler_clod_ref.bmp b/kernels/compiler_clod_ref.bmp
new file mode 100644
index 0000000..71afda9
Binary files /dev/null and b/kernels/compiler_clod_ref.bmp differ
diff --git a/kernels/compiler_function_argument.cl b/kernels/compiler_function_argument.cl
new file mode 100644
index 0000000..fe6de28
--- /dev/null
+++ b/kernels/compiler_function_argument.cl
@@ -0,0 +1,7 @@
+__kernel void
+compiler_function_argument(__global int *dst, int value)
+{
+ int id = (int)get_global_id(0);
+ dst[id] = value;
+}
+
diff --git a/kernels/compiler_function_argument0.cl b/kernels/compiler_function_argument0.cl
new file mode 100644
index 0000000..6bc2e92
--- /dev/null
+++ b/kernels/compiler_function_argument0.cl
@@ -0,0 +1,7 @@
+__kernel void
+compiler_function_argument0(__global int *dst, short value)
+{
+ int id = (int)get_global_id(0);
+ dst[id] = value;
+}
+
diff --git a/kernels/compiler_function_argument1.cl b/kernels/compiler_function_argument1.cl
new file mode 100644
index 0000000..8842b0b
--- /dev/null
+++ b/kernels/compiler_function_argument1.cl
@@ -0,0 +1,7 @@
+__kernel void
+compiler_function_argument1(__global int *dst, char value, short value0, int value1)
+{
+ int id = (int)get_global_id(0);
+ dst[id] = value + value0 + value1;
+}
+
diff --git a/kernels/compiler_gather_register_file.cl b/kernels/compiler_gather_register_file.cl
new file mode 100644
index 0000000..773797d
--- /dev/null
+++ b/kernels/compiler_gather_register_file.cl
@@ -0,0 +1,10 @@
+__kernel void
+compiler_gather_register_file(__global uint *src, __global uint *dst)
+{
+ __gen_ocl_force_simd16();
+ int id = (int)get_global_id(0);
+ const int x0 = src[id];
+ const unsigned short index = get_global_id(0);
+ dst[id] = __gen_ocl_rgather(index, x0);
+}
+
diff --git a/kernels/compiler_gather_register_file0.cl b/kernels/compiler_gather_register_file0.cl
new file mode 100644
index 0000000..0e6d487
--- /dev/null
+++ b/kernels/compiler_gather_register_file0.cl
@@ -0,0 +1,10 @@
+__kernel void
+compiler_gather_register_file0(__global uint *src, __global uint *dst)
+{
+ __gen_ocl_force_simd16();
+ int id = (int)get_global_id(0);
+ const int x0 = src[id];
+ const unsigned short index = 15 - get_global_id(0);
+ dst[id] = __gen_ocl_rgather(index, x0);
+}
+
diff --git a/kernels/compiler_gather_register_file1.cl b/kernels/compiler_gather_register_file1.cl
new file mode 100644
index 0000000..184202c
--- /dev/null
+++ b/kernels/compiler_gather_register_file1.cl
@@ -0,0 +1,11 @@
+__kernel void
+compiler_gather_register_file1(__global uint *src, __global uint *dst)
+{
+ __gen_ocl_force_simd16();
+ int id = (int)get_global_id(0);
+ const int x0 = src[id];
+ const int x1 = src[id+16];
+ const unsigned short index = 2*get_global_id(0);
+ dst[id] = __gen_ocl_rgather(index, x0, x1);
+}
+
diff --git a/kernels/compiler_if_else.cl b/kernels/compiler_if_else.cl
new file mode 100644
index 0000000..7ae8f99
--- /dev/null
+++ b/kernels/compiler_if_else.cl
@@ -0,0 +1,14 @@
+__kernel void
+compiler_if_else(__global int *src, __global int *dst)
+{
+ int id = (int)get_global_id(0);
+ dst[id] = src[id];
+ if (dst[id] >= 0) {
+ dst[id] = src[id+1];
+ src[id] = 1;
+ } else {
+ dst[id]--;
+ src[id] = 2;
+ }
+}
+
diff --git a/kernels/compiler_insert_to_constant.cl b/kernels/compiler_insert_to_constant.cl
new file mode 100644
index 0000000..f94c5c3
--- /dev/null
+++ b/kernels/compiler_insert_to_constant.cl
@@ -0,0 +1,6 @@
+__kernel void compiler_insert_to_constant(__global int4 *dst) {
+ int4 value = (int4)(0,1,2,3);
+ value.z = get_global_id(0);
+ dst[get_global_id(0)] = value;
+}
+
diff --git a/kernels/compiler_insn_selection_masked_min_max.cl b/kernels/compiler_insn_selection_masked_min_max.cl
new file mode 100644
index 0000000..5b4be57
--- /dev/null
+++ b/kernels/compiler_insn_selection_masked_min_max.cl
@@ -0,0 +1,11 @@
+__kernel void
+compiler_insn_selection_masked_min_max(__global float* src, __global float* dst)
+{
+ int id = (int)get_global_id(0);
+ if (get_local_id(0) > 5)
+ dst[id] = max(src[id], src[7]);
+ else
+ dst[id] = min(src[id], src[10]);
+}
+
+
diff --git a/kernels/compiler_insn_selection_max.cl b/kernels/compiler_insn_selection_max.cl
new file mode 100644
index 0000000..762de2b
--- /dev/null
+++ b/kernels/compiler_insn_selection_max.cl
@@ -0,0 +1,7 @@
+__kernel void
+compiler_insn_selection_max(__global float* src, __global float* dst)
+{
+ int id = (int)get_global_id(0);
+ dst[id] = max(src[id], src[0]);
+}
+
diff --git a/kernels/compiler_insn_selection_min.cl b/kernels/compiler_insn_selection_min.cl
new file mode 100644
index 0000000..6800eaf
--- /dev/null
+++ b/kernels/compiler_insn_selection_min.cl
@@ -0,0 +1,7 @@
+__kernel void
+compiler_insn_selection_min(__global float* src, __global float* dst)
+{
+ int id = (int)get_global_id(0);
+ dst[id] = min(src[id], src[0]);
+}
+
diff --git a/kernels/compiler_julia.cl b/kernels/compiler_julia.cl
new file mode 100644
index 0000000..996c0b7
--- /dev/null
+++ b/kernels/compiler_julia.cl
@@ -0,0 +1,146 @@
+typedef float2 vec2;
+typedef float3 vec3;
+typedef float4 vec4;
+
+#define sin native_sin
+#define cos native_cos
+#define tan native_tan
+#define normalize fast_normalize
+#define length fast_length
+#define mod fmod
+#define time 1.f
+
+inline vec3 reflect(vec3 I, vec3 N) {
+ return I - 2.0f * dot(N, I) * N;
+}
+
+inline float clamp(x,m,M) { return max(min(x,M),m); }
+
+inline uint pack_fp4(float4 u4) {
+ uint u;
+ u = (((uint) u4.x)) |
+ (((uint) u4.y) << 8) |
+ (((uint) u4.z) << 16);
+ return u;
+}
+
+#define OUTPUT do {\
+ const vec4 final = 255.f * max(min(gl_FragColor, (vec4)(1.f)), (vec4)(0.f)); \
+ dst[get_global_id(0) + get_global_id(1) * w] = pack_fp4(final); \
+} while (0)
+
+__attribute__((always_inline))
+float jinteresct(vec3 rO, vec3 rD, vec4 c, float *ao)
+{
+ float mz2,md2,dist,t;
+ float res=1000.0f;
+ vec4 z,nz;
+ int update_ao = 1;
+ *ao = 0.0f;
+ for(t=0.0f;t<6.0f;t+=dist)
+ {
+ if (update_ao) *ao += 1.0f;
+ vec3 p=rO+t*rD;
+
+ // calc distance
+ z=(vec4)(p,(c.y+c.x)*.3f);
+ md2=1.0f;
+ mz2=dot(z,z);
+
+ for(int i=0;i<9;i++)
+ {
+ // |dz|^2 -> 4*|dz|^2
+ //if (mz2 <= 4.0f)
+ {
+ md2*=4.0f*mz2;
+ // z -> z2 + c
+ nz.x=z.x*z.x-dot(z.yzw,z.yzw);
+ nz.yzw=2.0f*z.x*z.yzw;
+ z=nz+c;
+ mz2=dot(z,z);
+ }
+ if(mz2>4.0f)
+ break;
+ }
+
+ dist=0.25f*sqrt(mz2/md2)*log(mz2);
+ if(dist<0.0005f)
+ {
+ res=t;
+ break;
+ }
+ t+= dist;
+ }
+
+ return res;
+}
+
+#if 1
+__attribute__((always_inline))
+vec3 calcNormal(vec3 p, vec4 c)
+{
+ vec4 nz,ndz,dz[4];
+
+ vec4 z=(vec4)(p,(c.y+c.x)*.3f);
+
+ dz[0]=(vec4)(1.0f,0.0f,0.0f,0.0f);
+ dz[1]=(vec4)(0.0f,1.0f,0.0f,0.0f);
+ dz[2]=(vec4)(0.0f,0.0f,1.0f,0.0f);
+ //dz[3]=(vec4)(0.0f,0.0f,0.0f,1.0f);
+
+ for(int i=0;i<9;i++)
+ {
+ vec4 mz = (vec4)(z.x,-z.y,-z.z,-z.w);
+ // derivative
+ dz[0]=(vec4)(dot(mz,dz[0]),z.x*dz[0].yzw+dz[0].x*z.yzw);
+ dz[1]=(vec4)(dot(mz,dz[1]),z.x*dz[1].yzw+dz[1].x*z.yzw);
+ dz[2]=(vec4)(dot(mz,dz[2]),z.x*dz[2].yzw+dz[2].x*z.yzw);
+ //dz[3]=(vec4)(dot(mz,dz[3]),z.x*dz[3].yzw+dz[3].x*z.yzw);
+
+ // z = z2 + c
+ nz.x=dot(z, mz);
+ nz.yzw=2.0f*z.x*z.yzw;
+ z=nz+c;
+
+ if(dot(z,z)>4.0f)
+ break;
+ }
+
+ return normalize((vec3)(dot(z,dz[0]),dot(z,dz[1]),dot(z,dz[2])));
+}
+#endif
+
+__kernel void compiler_julia(__global uint *dst, float resx, float resy, int w)
+{
+ vec2 gl_FragCoord = (vec2)(get_global_id(0), get_global_id(1));
+ vec2 p=-1.0f+2.0f*gl_FragCoord.xy/(vec2)(resx,resy);
+ vec3 color = (vec3)(0.0f);
+ vec4 cccc = (vec4)( .7f*cos(.5f*time), .7f*sin(.3f*time), .7f*cos(1.0f*time), 0.0f );
+ vec3 edir = normalize((vec3)(p,1.0f));
+ vec3 wori = (vec3)(0.0f,0.0f,-2.0f);
+
+ float ao;
+ float t = jinteresct(wori,edir,cccc,&ao);
+ if(t<100.0f)
+ {
+#if 1
+ vec3 inter = wori + t*edir;
+ vec3 nor = calcNormal(inter,cccc);
+
+ float dif = .5f + .5f*dot( nor, (vec3)(0.57703f) );
+ ao = max( 1.0f-ao*0.005f, 0.0f);
+
+ color = (vec3)(1.0f,.9f,.5f)*dif*ao + .5f*(vec3)(.6f,.7f,.8f)*ao;
+#else
+ color = (vec3)(0.5f,0.0f,0.0f);
+#endif
+ }
+ else
+ {
+ color = (vec3)(0.5f,0.51f,0.52f)+(vec3)(0.5f,0.47f,0.45f)*p.y;
+ }
+
+ vec4 gl_FragColor = (vec4)(color,1.0f);
+ OUTPUT;
+}
+
diff --git a/kernels/compiler_julia_no_break.cl b/kernels/compiler_julia_no_break.cl
new file mode 100644
index 0000000..c0bd3b1
--- /dev/null
+++ b/kernels/compiler_julia_no_break.cl
@@ -0,0 +1,147 @@
+typedef float2 vec2;
+typedef float3 vec3;
+typedef float4 vec4;
+
+#define sin native_sin
+#define cos native_cos
+#define tan native_tan
+#define normalize fast_normalize
+#define length fast_length
+#define mod fmod
+#define time 1.f
+
+inline vec3 reflect(vec3 I, vec3 N) {
+ return I - 2.0f * dot(N, I) * N;
+}
+
+inline float clamp(x,m,M) { return max(min(x,M),m); }
+
+inline uint pack_fp4(float4 u4) {
+ uint u;
+ u = (((uint) u4.x)) |
+ (((uint) u4.y) << 8) |
+ (((uint) u4.z) << 16);
+ return u;
+}
+
+#define OUTPUT do {\
+ const vec4 final = 255.f * max(min(gl_FragColor, (vec4)(1.f)), (vec4)(0.f)); \
+ dst[get_global_id(0) + get_global_id(1) * w] = pack_fp4(final); \
+} while (0)
+
+__attribute__((always_inline))
+float jinteresct(vec3 rO, vec3 rD, vec4 c, float *ao)
+{
+ float mz2,md2,dist,t;
+ float res=1000.0f;
+ vec4 z,nz;
+ int update_ao = 1;
+ *ao = 0.0f;
+ t = 0.f;
+ for (int j = 0; j < 100; ++j)
+ {
+ if (update_ao) *ao += 1.0f;
+ vec3 p=rO+t*rD;
+
+ // calc distance
+ z=(vec4)(p,(c.y+c.x)*.3f);
+ md2=1.0f;
+ mz2=dot(z,z);
+
+ for(int i=0;i<9;i++)
+ {
+ // |dz|^2 -> 4*|dz|^2
+ //if (mz2 <= 4.0f)
+ {
+ md2*=4.0f*mz2;
+ // z -> z2 + c
+ nz.x=z.x*z.x-dot(z.yzw,z.yzw);
+ nz.yzw=2.0f*z.x*z.yzw;
+ z=nz+c;
+ mz2=dot(z,z);
+ }
+ if(mz2>4.0f)
+ break;
+ }
+
+ dist=0.25f*sqrt(mz2/md2)*log(mz2);
+ if(dist<0.0005f)
+ {
+ res=t;
+ update_ao = 0;
+ }
+ t+= dist;
+ }
+
+ return res;
+}
+
+#if 1
+__attribute__((always_inline))
+vec3 calcNormal(vec3 p, vec4 c)
+{
+ vec4 nz,ndz,dz[4];
+
+ vec4 z=(vec4)(p,(c.y+c.x)*.3f);
+
+ dz[0]=(vec4)(1.0f,0.0f,0.0f,0.0f);
+ dz[1]=(vec4)(0.0f,1.0f,0.0f,0.0f);
+ dz[2]=(vec4)(0.0f,0.0f,1.0f,0.0f);
+ //dz[3]=(vec4)(0.0f,0.0f,0.0f,1.0f);
+
+ for(int i=0;i<9;i++)
+ {
+ vec4 mz = (vec4)(z.x,-z.y,-z.z,-z.w);
+ // derivative
+ dz[0]=(vec4)(dot(mz,dz[0]),z.x*dz[0].yzw+dz[0].x*z.yzw);
+ dz[1]=(vec4)(dot(mz,dz[1]),z.x*dz[1].yzw+dz[1].x*z.yzw);
+ dz[2]=(vec4)(dot(mz,dz[2]),z.x*dz[2].yzw+dz[2].x*z.yzw);
+ //dz[3]=(vec4)(dot(mz,dz[3]),z.x*dz[3].yzw+dz[3].x*z.yzw);
+
+ // z = z2 + c
+ nz.x=dot(z, mz);
+ nz.yzw=2.0f*z.x*z.yzw;
+ z=nz+c;
+
+ if(dot(z,z)>4.0f)
+ break;
+ }
+
+ return normalize((vec3)(dot(z,dz[0]),dot(z,dz[1]),dot(z,dz[2])));
+}
+#endif
+
+__kernel void compiler_julia_no_break(__global uint *dst, float resx, float resy, int w)
+{
+ vec2 gl_FragCoord = (vec2)(get_global_id(0), get_global_id(1));
+ vec2 p=-1.0f+2.0f*gl_FragCoord.xy/(vec2)(resx,resy);
+ vec3 color = (vec3)(0.0f);
+ vec4 cccc = (vec4)( .7f*cos(.5f*time), .7f*sin(.3f*time), .7f*cos(1.0f*time), 0.0f );
+ vec3 edir = normalize((vec3)(p,1.0f));
+ vec3 wori = (vec3)(0.0f,0.0f,-2.0f);
+
+ float ao;
+ float t = jinteresct(wori,edir,cccc,&ao);
+ if(t<100.0f)
+ {
+#if 1
+ vec3 inter = wori + t*edir;
+ vec3 nor = calcNormal(inter,cccc);
+
+ float dif = .5f + .5f*dot( nor, (vec3)(0.57703f) );
+ ao = max( 1.0f-ao*0.005f, 0.0f);
+
+ color = (vec3)(1.0f,.9f,.5f)*dif*ao + .5f*(vec3)(.6f,.7f,.8f)*ao;
+#else
+ color = (vec3)(0.5f,0.0f,0.0f);
+#endif
+ }
+ else
+ {
+ color = (vec3)(0.5f,0.51f,0.52f)+(vec3)(0.5f,0.47f,0.45f)*p.y;
+ }
+
+ vec4 gl_FragColor = (vec4)(color,1.0f);
+ OUTPUT;
+}
+
diff --git a/kernels/compiler_julia_no_break_ref.bmp b/kernels/compiler_julia_no_break_ref.bmp
new file mode 100644
index 0000000..e17f666
Binary files /dev/null and b/kernels/compiler_julia_no_break_ref.bmp differ
diff --git a/kernels/compiler_julia_ref.bmp b/kernels/compiler_julia_ref.bmp
new file mode 100644
index 0000000..2082a1e
Binary files /dev/null and b/kernels/compiler_julia_ref.bmp differ
diff --git a/kernels/compiler_local_memory.cl b/kernels/compiler_local_memory.cl
new file mode 100644
index 0000000..daadd66
--- /dev/null
+++ b/kernels/compiler_local_memory.cl
@@ -0,0 +1,5 @@
+__kernel void compiler_local_memory(__global int *dst, __local int *src) {
+ src[get_local_id(0)] = get_local_id(0);
+ dst[get_global_id(0)] = src[15 - get_local_id(0)];
+}
+
diff --git a/kernels/compiler_local_memory_barrier.cl b/kernels/compiler_local_memory_barrier.cl
new file mode 100644
index 0000000..39a94b8
--- /dev/null
+++ b/kernels/compiler_local_memory_barrier.cl
@@ -0,0 +1,6 @@
+__kernel void compiler_local_memory_barrier(__global int *dst, __local int *src) {
+ src[get_local_id(0)] = get_local_id(0);
+ barrier(CLK_LOCAL_MEM_FENCE);
+ dst[get_global_id(0)] = src[15 - get_local_id(0)];
+}
+
diff --git a/kernels/compiler_local_memory_barrier_wg64.cl b/kernels/compiler_local_memory_barrier_wg64.cl
new file mode 100644
index 0000000..b2ea906
--- /dev/null
+++ b/kernels/compiler_local_memory_barrier_wg64.cl
@@ -0,0 +1,6 @@
+__kernel void compiler_local_memory_barrier_wg64(__global int *dst, __local int *src) {
+ src[get_local_id(0)] = get_local_id(0);
+ barrier(CLK_LOCAL_MEM_FENCE);
+ dst[get_global_id(0)] = src[63 - get_local_id(0)];
+}
+
diff --git a/kernels/compiler_local_memory_two_ptr.cl b/kernels/compiler_local_memory_two_ptr.cl
new file mode 100644
index 0000000..f410406
--- /dev/null
+++ b/kernels/compiler_local_memory_two_ptr.cl
@@ -0,0 +1,9 @@
+__kernel void compiler_local_memory_two_ptr(__global int *dst,
+ __local int *src0,
+ __local int *src1)
+{
+ src0[get_local_id(0)] = get_local_id(0);
+ src1[get_local_id(0)] = get_global_id(0);
+ dst[get_global_id(0)] = src0[15 - get_local_id(0)] + src1[15 - get_local_id(0)];
+}
+
diff --git a/kernels/compiler_local_slm.cl b/kernels/compiler_local_slm.cl
new file mode 100644
index 0000000..1a4b175
--- /dev/null
+++ b/kernels/compiler_local_slm.cl
@@ -0,0 +1,10 @@
+#if 0
+__kernel void compiler_local_slm(__global int *dst, __local int *hop) {
+#else
+__kernel void compiler_local_slm(__global int *dst) {
+ __local int hop[10];
+#endif
+ hop[get_global_id(0)] = get_local_id(1);
+ dst[get_global_id(0)] = hop[get_local_id(0)];
+}
+
diff --git a/kernels/compiler_lower_return0.cl b/kernels/compiler_lower_return0.cl
new file mode 100644
index 0000000..fd9846e
--- /dev/null
+++ b/kernels/compiler_lower_return0.cl
@@ -0,0 +1,8 @@
+__kernel void
+compiler_lower_return0(__global int *src, __global int *dst) {
+ const int id = get_global_id(0);
+ dst[id] = id;
+ if (src[id] > 0) return;
+ dst[id] = src[id];
+}
+
diff --git a/kernels/compiler_lower_return1.cl b/kernels/compiler_lower_return1.cl
new file mode 100644
index 0000000..bcb6b7f
--- /dev/null
+++ b/kernels/compiler_lower_return1.cl
@@ -0,0 +1,8 @@
+__kernel void
+compiler_lower_return1(__global int *src, __global int *dst) {
+ const int id = get_global_id(0);
+ dst[id] = id;
+ if (id < 11 && (src[id] > 0 || src[id+16] < 2)) return;
+ dst[id] = src[id];
+}
+
diff --git a/kernels/compiler_lower_return2.cl b/kernels/compiler_lower_return2.cl
new file mode 100644
index 0000000..9fa8ad6
--- /dev/null
+++ b/kernels/compiler_lower_return2.cl
@@ -0,0 +1,11 @@
+__kernel void
+compiler_lower_return2(__global int *src, __global int *dst) {
+ const int id = get_global_id(0);
+ dst[id] = id;
+ while (dst[id] > src[id]) {
+ if (dst[id] > 10) return;
+ dst[id]--;
+ }
+ dst[id] += 2;
+}
+
diff --git a/kernels/compiler_mandelbrot.cl b/kernels/compiler_mandelbrot.cl
new file mode 100644
index 0000000..42295ab
--- /dev/null
+++ b/kernels/compiler_mandelbrot.cl
@@ -0,0 +1,47 @@
+// Used to ID into the 1D array, so that we can use
+// it effectively as a 2D array
+int ID(int x, int y, int width) { return 4*width*y + x*4; }
+float mapX(float x) { return x*3.25f - 2.f; }
+float mapY(float y) { return y*2.5f - 1.25f; }
+
+__kernel void compiler_mandelbrot(__global char *out) {
+ int x_dim = get_global_id(0);
+ int y_dim = get_global_id(1);
+ int width = get_global_size(0);
+ int height = get_global_size(1);
+ int idx = ID(x_dim, y_dim, width);
+
+ float x_origin = mapX((float) x_dim / (float) width);
+ float y_origin = mapY((float) y_dim / (float) height);
+
+ // The Escape time algorithm, it follows the pseduocode from Wikipedia
+ // _very_ closely
+ float x = 0.0f;
+ float y = 0.0f;
+
+ int iteration = 0;
+
+ // This can be changed, to be more or less precise
+ int max_iteration = 256;
+ while(x*x + y*y <= 4 && iteration < max_iteration) {
+ float xtemp = x*x - y*y + x_origin;
+ y = 2*x*y + y_origin;
+ x = xtemp;
+ iteration++;
+ }
+
+ if(iteration == max_iteration) {
+ // This coordinate did not escape, so it is in the Mandelbrot set
+ out[idx] = 0;
+ out[idx + 1] = 0;
+ out[idx + 2] = 0;
+ out[idx + 3] = 255;
+ } else {
+ // This coordinate did escape, so color based on quickly it escaped
+ out[idx] = iteration;
+ out[idx + 1] = iteration;
+ out[idx + 2] = iteration;
+ out[idx + 3] = 255;
+ }
+
+}
diff --git a/kernels/compiler_mandelbrot_alternate.cl b/kernels/compiler_mandelbrot_alternate.cl
new file mode 100644
index 0000000..fc99326
--- /dev/null
+++ b/kernels/compiler_mandelbrot_alternate.cl
@@ -0,0 +1,38 @@
+int offset(int x, int y, int width) { return width*y + x; }
+float mapX(float x) {return x*3.25f - 2.f;}
+float mapY(float y) {return y*2.5f - 1.25f;}
+
+__kernel void compiler_mandelbrot_alternate(__global uint *out,
+ float rcpWidth,
+ float rcpHeight,
+ float criterium)
+{
+ int xDim = get_global_id(0);
+ int yDim = get_global_id(1);
+ int width = get_global_size(0);
+ int height = get_global_size(1);
+ int idx = offset(xDim, yDim, width);
+
+ float xOrigin = mapX((float) xDim * rcpWidth);
+ float yOrigin = mapY((float) yDim * rcpHeight);
+ float x = 0.0f;
+ float y = 0.0f;
+
+ float iteration = 256.f;
+
+ bool breakCond = true;
+ while (breakCond) {
+ const float xtemp = mad(-y,y,mad(x,x,xOrigin));
+ y = mad(2.f*x, y, yOrigin);
+ x = xtemp;
+ iteration -= 1.f;
+ breakCond = -mad(y,y,mad(x,x, -criterium)) * iteration > 0.f;
+ }
+
+ const uint iIteration = 256 - (uint) iteration;
+ const uint isBlack = (iIteration == 256);
+ const uint black = 255 << 24;
+ const uint nonBlack = iIteration | (iIteration << 8) | (iIteration << 16) | (255 << 24);
+ out[idx] = select(nonBlack, black, isBlack);
+}
+
diff --git a/kernels/compiler_mandelbrot_alternate_ref.bmp b/kernels/compiler_mandelbrot_alternate_ref.bmp
new file mode 100644
index 0000000..011d583
Binary files /dev/null and b/kernels/compiler_mandelbrot_alternate_ref.bmp differ
diff --git a/kernels/compiler_mandelbrot_ref.bmp b/kernels/compiler_mandelbrot_ref.bmp
new file mode 100644
index 0000000..494bf8b
Binary files /dev/null and b/kernels/compiler_mandelbrot_ref.bmp differ
diff --git a/kernels/compiler_math.cl b/kernels/compiler_math.cl
new file mode 100644
index 0000000..0659840
--- /dev/null
+++ b/kernels/compiler_math.cl
@@ -0,0 +1,14 @@
+__kernel void compiler_math(__global float *dst, __global float *src) {
+ const float x = src[get_global_id(0)];
+ switch (get_global_id(0)) {
+ case 0: dst[get_global_id(0)] = native_cos(x); break;
+ case 1: dst[get_global_id(0)] = native_sin(x); break;
+ case 2: dst[get_global_id(0)] = native_log2(x); break;
+ case 3: dst[get_global_id(0)] = native_sqrt(x); break;
+ case 4: dst[get_global_id(0)] = native_rsqrt(x); break;
+ case 5: dst[get_global_id(0)] = native_recip(x); break;
+ case 6: dst[get_global_id(0)] = native_tan(x); break;
+ default: dst[get_global_id(0)] = 1.f; break;
+ };
+}
+
diff --git a/kernels/compiler_menger_sponge.cl b/kernels/compiler_menger_sponge.cl
new file mode 100644
index 0000000..b59c5e3
--- /dev/null
+++ b/kernels/compiler_menger_sponge.cl
@@ -0,0 +1,189 @@
+// See http://www.iquilezles.org/articles/menger/menger.htm for the
+// full explanation of how this was done
+
+typedef float2 vec2;
+typedef float3 vec3;
+typedef float4 vec4;
+
+#define sin native_sin
+#define cos native_cos
+#define tan native_tan
+#define normalize fast_normalize
+#define length fast_length
+#define mod fmod
+#define time 1.f
+
+// fmod is not like glsl mod!
+__attribute__((always_inline, overloadable))
+float glsl_mod(float x,float y) { return x-y*floor(x/y); }
+__attribute__((always_inline, overloadable))
+float2 glsl_mod(float2 a,float2 b) { return (float2)(glsl_mod(a.x,b.x), glsl_mod(a.y,b.y)); }
+__attribute__((always_inline, overloadable))
+float3 glsl_mod(float3 a,float3 b) { return (float3)(glsl_mod(a.x,b.x), glsl_mod(a.y,b.y), glsl_mod(a.z,b.z)); }
+
+inline vec3 reflect(vec3 I, vec3 N) {
+ return I - 2.0f * dot(N, I) * N;
+}
+
+inline float clamp(x,m,M) { return max(min(x,M),m); }
+
+inline uint pack_fp4(float4 u4) {
+ uint u;
+ u = (((uint) u4.x)) |
+ (((uint) u4.y) << 8) |
+ (((uint) u4.z) << 16);
+ return u;
+}
+
+#define OUTPUT do {\
+ const vec4 final = 255.f * max(min(gl_FragColor, (vec4)(1.f)), (vec4)(0.f)); \
+ dst[get_global_id(0) + get_global_id(1) * w] = pack_fp4(final); \
+} while (0)
+
+__attribute__((always_inline))
+float maxcomp(vec3 p) { return max(p.x,max(p.y,p.z));}
+
+__attribute__((always_inline))
+float sdBox(vec3 p, vec3 b)
+{
+ vec3 di = fabs(p) - b;
+ float mc = maxcomp(di);
+ return min(mc,length(max(di,0.0f)));
+}
+
+__attribute__((always_inline))
+vec4 map(vec3 p)
+{
+ float d = sdBox(p,(vec3)(1.0f));
+ float4 res = (vec4)(d,1.f,0.f,0.f);
+
+ float s = 1.0f;
+ for( int m=0; m<3; m++ )
+ {
+ vec3 a = glsl_mod(p*s, 2.0f)-1.0f;
+ s *= 3.0f;
+ float rx = fabs(1.0f - 3.0f*fabs(a.x));
+ float ry = fabs(1.0f - 3.0f*fabs(a.y));
+ float rz = fabs(1.0f - 3.0f*fabs(a.z));
+
+ float da = max(rx,ry);
+ float db = max(ry,rz);
+ float dc = max(rz,rx);
+ float c = (min(da,min(db,dc))-1.0f)/s;
+ if (c > d)
+ {
+ d = c;
+ res = (vec4)(d, 0.2f*da*db*dc, (1.0f+(float)(m))/4.0f, 0.0f);
+ }
+ }
+ return (vec4)(res.x,res.y,res.z,0.f);
+}
+
+// GLSL ES doesn't seem to like loops with conditional break/return...
+#if 1
+__attribute__((always_inline))
+vec4 intersect( vec3 ro, vec3 rd )
+{
+ float t = 0.0f;
+ for(int i=0;i<64;i++)
+ {
+ vec4 h = map(ro + rd*t);
+ if( h.x<0.002f )
+ return (vec4)(t,h.yzw);
+ t += h.x;
+ }
+ return (vec4)(-1.0f);
+}
+#else
+__attribute__((always_inline))
+vec4 intersect( vec3 ro, vec3 rd )
+{
+ float t = 0.0f;
+ vec4 res = (vec4)(-1.0f);
+ for(int i=0;i<64;i++)
+ {
+ vec4 h = map(ro + rd*t);
+ if (h.x<0.002f)
+ {
+ if(res.x<0.0f) res = (vec4)(t,h.yzw);
+ }
+ t += h.x;
+ }
+ return res;
+}
+#endif
+
+__attribute__((always_inline))
+vec3 calcNormal(vec3 pos)
+{
+ vec3 epsxyy = (vec3)(.001f,0.0f,0.0f);
+ vec3 epsyxy = (vec3)(0.0f,.001f,0.0f);
+ vec3 epsyyx = (vec3)(0.0f,0.0f,.001f);
+ vec3 nor;
+ nor.x = map(pos+epsxyy).x - map(pos-epsxyy).x;
+ nor.y = map(pos+epsyxy).x - map(pos-epsyxy).x;
+ nor.z = map(pos+epsyyx).x - map(pos-epsyyx).x;
+ return normalize(nor);
+}
+
+__kernel void compiler_menger_sponge(__global uint *dst, float resx, float resy, int w)
+{
+ vec2 gl_FragCoord = (vec2)(get_global_id(0), get_global_id(1));
+ vec2 p=-1.0f+2.0f*gl_FragCoord.xy/(vec2)(resx,resy);
+
+ // light
+ vec3 light = normalize((vec3)(1.0f,0.8f,-0.6f));
+
+ float ctime = time;
+ // camera
+ vec3 ro = 1.1f*(vec3)(2.5f*cos(0.5f*ctime),1.5f*cos(ctime*.23f),2.5f*sin(0.5f*ctime));
+ vec3 ww = normalize((vec3)(0.0f) - ro);
+ vec3 uu = normalize(cross( (vec3)(0.0f,1.0f,0.0f), ww ));
+ vec3 vv = normalize(cross(ww,uu));
+ vec3 rd = normalize( p.x*uu + p.y*vv + 1.5f*ww );
+ vec3 col = (vec3)(0.0f);
+ vec4 tmat = intersect(ro,rd);
+
+#if 0
+ if( tmat.x>0.0 )
+ col = (vec3)(
+ 0.6f+0.4f*cos(5.0f+6.2831f*tmat.z),
+ 0.6f+0.4f*cos(5.4f+6.2831f*tmat.z),
+ 0.6f+0.4f*cos(5.7f+6.2831f*tmat.z) );
+
+#else
+ if( tmat.x>0.0f )
+ {
+ vec3 pos = ro + tmat.x*rd;
+ vec3 nor = calcNormal(pos);
+
+ float dif1 = max(0.4f + 0.6f*dot(nor,light),0.0f);
+ float dif2 = max(0.4f + 0.6f*dot(nor,(vec3)(-light.x,light.y,-light.z)),0.0f);
+
+ // shadow
+ float ldis = 4.0f;
+ vec4 shadow = intersect( pos + light*ldis, -light );
+ if( shadow.x>0.0f && shadow.x<(ldis-0.01f) ) dif1=0.0f;
+
+ float ao = tmat.y;
+ col = 1.0f*ao*(vec3) (0.2f,0.2f,0.2f);
+ col += 2.0f*(0.5f+0.5f*ao)*dif1*(vec3)(1.0f,0.97f,0.85f);
+ col += 0.2f*(0.5f+0.5f*ao)*dif2*(vec3)(1.0f,0.97f,0.85f);
+ col += 1.0f*(0.5f+0.5f*ao)*(0.5f+0.5f*nor.y)*(vec3)(0.1f,0.15f,0.2f);
+
+ // gamma lighting
+ col = col*0.5f+0.5f*sqrt(col)*1.2f;
+
+ vec3 matcol = (vec3)(
+ 0.6f+0.4f*cos(5.0f+6.2831f*tmat.z),
+ 0.6f+0.4f*cos(5.4f+6.2831f*tmat.z),
+ 0.6f+0.4f*cos(5.7f+6.2831f*tmat.z) );
+ col *= matcol;
+ col *= 1.5f*exp(-0.5f*tmat.x);
+ }
+#endif
+
+ vec4 gl_FragColor = (vec4)(col,1.0f);
+ OUTPUT;
+}
+
diff --git a/kernels/compiler_menger_sponge_no_shadow.cl b/kernels/compiler_menger_sponge_no_shadow.cl
new file mode 100644
index 0000000..4f1093f
--- /dev/null
+++ b/kernels/compiler_menger_sponge_no_shadow.cl
@@ -0,0 +1,125 @@
+// See http://www.iquilezles.org/articles/menger/menger.htm for the
+// full explanation of how this was done
+
+typedef float2 vec2;
+typedef float3 vec3;
+typedef float4 vec4;
+
+#define sin native_sin
+#define cos native_cos
+#define tan native_tan
+#define normalize fast_normalize
+#define length fast_length
+#define mod fmod
+#define time 1.f
+
+// fmod is not like glsl mod!
+__attribute__((always_inline, overloadable))
+float glsl_mod(float x,float y) { return x-y*floor(x/y); }
+__attribute__((always_inline, overloadable))
+float2 glsl_mod(float2 a,float2 b) { return (float2)(glsl_mod(a.x,b.x), glsl_mod(a.y,b.y)); }
+__attribute__((always_inline, overloadable))
+float3 glsl_mod(float3 a,float3 b) { return (float3)(glsl_mod(a.x,b.x), glsl_mod(a.y,b.y), glsl_mod(a.z,b.z)); }
+
+inline vec3 reflect(vec3 I, vec3 N) {
+ return I - 2.0f * dot(N, I) * N;
+}
+
+inline float clamp(x,m,M) { return max(min(x,M),m); }
+
+inline uint pack_fp4(float4 u4) {
+ uint u;
+ u = (((uint) u4.x)) |
+ (((uint) u4.y) << 8) |
+ (((uint) u4.z) << 16);
+ return u;
+}
+
+#define OUTPUT do {\
+ const vec4 final = 255.f * max(min(gl_FragColor, (vec4)(1.f)), (vec4)(0.f)); \
+ dst[get_global_id(0) + get_global_id(1) * w] = pack_fp4(final); \
+} while (0)
+
+__attribute__((always_inline))
+float maxcomp(vec3 p) { return max(p.x,max(p.y,p.z));}
+
+__attribute__((always_inline))
+float sdBox(vec3 p, vec3 b)
+{
+ vec3 di = fabs(p) - b;
+ float mc = maxcomp(di);
+ return min(mc,length(max(di,0.0f)));
+}
+
+__attribute__((always_inline))
+vec4 map(vec3 p)
+{
+ float d = sdBox(p,(vec3)(1.0f));
+ float4 res = (vec4)(d,1.f,0.f,0.f);
+
+ float s = 1.0f;
+ for( int m=0; m<3; m++ )
+ {
+ vec3 a = glsl_mod(p*s, 2.0f)-1.0f;
+ s *= 3.0f;
+ float rx = fabs(1.0f - 3.0f*fabs(a.x));
+ float ry = fabs(1.0f - 3.0f*fabs(a.y));
+ float rz = fabs(1.0f - 3.0f*fabs(a.z));
+
+ float da = max(rx,ry);
+ float db = max(ry,rz);
+ float dc = max(rz,rx);
+ float c = (min(da,min(db,dc))-1.0f)/s;
+ if (c > d)
+ {
+ d = c;
+ res = (vec4)(d, 0.2f*da*db*dc, (1.0f+(float)(m))/4.0f, 0.0f);
+ }
+ }
+ return (vec4)(res.x,res.y,res.z,0.f);
+}
+
+// GLSL ES doesn't seem to like loops with conditional break/return...
+__attribute__((always_inline))
+vec4 intersect( vec3 ro, vec3 rd )
+{
+ float t = 0.0f;
+ for(int i=0;i<64;i++)
+ {
+ vec4 h = map(ro + rd*t);
+ if( h.x<0.002f )
+ return (vec4)(t,h.yzw);
+ t += h.x;
+ }
+ return (vec4)(-1.0f);
+}
+
+__kernel void compiler_menger_sponge_no_shadow(__global uint *dst, float resx, float resy, int w)
+{
+ vec2 gl_FragCoord = (vec2)(get_global_id(0), get_global_id(1));
+ vec2 p=-1.0f+2.0f*gl_FragCoord.xy/(vec2)(resx,resy);
+
+ // light
+ vec3 light = normalize((vec3)(1.0f,0.8f,-0.6f));
+
+ float ctime = time;
+ // camera
+ vec3 ro = 1.1f*(vec3)(2.5f*cos(0.5f*ctime),1.5f*cos(ctime*.23f),2.5f*sin(0.5f*ctime));
+ vec3 ww = normalize((vec3)(0.0f) - ro);
+ vec3 uu = normalize(cross( (vec3)(0.0f,1.0f,0.0f), ww ));
+ vec3 vv = normalize(cross(ww,uu));
+ vec3 rd = normalize( p.x*uu + p.y*vv + 1.5f*ww );
+ vec3 col = (vec3)(0.0f);
+ vec4 tmat = intersect(ro,rd);
+
+ if( tmat.x>0.0f )
+ col = (vec3)(
+ 0.6f+0.4f*cos(5.0f+6.2831f*tmat.z),
+ 0.6f+0.4f*cos(5.4f+6.2831f*tmat.z),
+ 0.6f+0.4f*cos(5.7f+6.2831f*tmat.z) );
+
+ vec4 gl_FragColor = (vec4)(col,1.0f);
+ OUTPUT;
+}
+
+
diff --git a/kernels/compiler_menger_sponge_no_shadow_ref.bmp b/kernels/compiler_menger_sponge_no_shadow_ref.bmp
new file mode 100644
index 0000000..133dd1d
Binary files /dev/null and b/kernels/compiler_menger_sponge_no_shadow_ref.bmp differ
diff --git a/kernels/compiler_menger_sponge_ref.bmp b/kernels/compiler_menger_sponge_ref.bmp
new file mode 100644
index 0000000..911289f
Binary files /dev/null and b/kernels/compiler_menger_sponge_ref.bmp differ
diff --git a/kernels/compiler_nautilus.cl b/kernels/compiler_nautilus.cl
new file mode 100644
index 0000000..b53771c
--- /dev/null
+++ b/kernels/compiler_nautilus.cl
@@ -0,0 +1,68 @@
+typedef float2 vec2;
+typedef float3 vec3;
+typedef float4 vec4;
+
+#define sin native_sin
+#define cos native_cos
+#define tan native_tan
+#define normalize fast_normalize
+#define length fast_length
+#define mod fmod
+#define time 1.f
+
+inline vec3 reflect(vec3 I, vec3 N) {
+ return I - 2.0f * dot(N, I) * N;
+}
+
+inline float clamp(x,m,M) { return max(min(x,M),m); }
+
+inline uint pack_fp4(float4 u4) {
+ uint u;
+ u = (((uint) u4.x)) |
+ (((uint) u4.y) << 8) |
+ (((uint) u4.z) << 16);
+ return u;
+}
+
+#define OUTPUT do {\
+ const vec4 final = 255.f * max(min(gl_FragColor, (vec4)(1.f)), (vec4)(0.f)); \
+ dst[get_global_id(0) + get_global_id(1) * w] = pack_fp4(final); \
+} while (0)
+
+inline float e(vec3 c)
+{
+ c=cos((vec3)(cos(c.x+time/6.0f)*c.x-cos(c.y*3.0f+time/5.0f)*c.y,
+ cos(time/4.0f)*c.z/3.0f*c.x-cos(time/7.0f)*c.y,
+ c.x+c.y+c.z+time));
+ return dot(c*c,(vec3)(1.0f))-1.0f;
+}
+
+__kernel void compiler_nautilus(__global uint *dst, float resx, float resy, int w)
+{
+ vec2 gl_FragCoord = (vec2)(get_global_id(0), get_global_id(1));
+ vec2 c=-1.0f+2.0f*gl_FragCoord.xy/(vec2)(resx,resy);
+ vec3 o=(vec3)(c.x,c.y,0.0f),g=(vec3)(c.x,c.y,1.0f)/64.0f,v=(vec3)(0.5f);
+ float m = 0.4f;
+
+ for(int r=0;r<100;r++)
+ {
+ float h=e(o)-m;
+ if(h<0.0f)break;
+ o+=h*10.0f*g;
+ v+=h*0.02f;
+ }
+ // light (who needs a normal?)
+ v+=e(o+0.1f)*(vec3)(0.4f,0.7f,1.0f);
+
+ // ambient occlusion
+ float a=0.0f;
+ for(int q=0;q<100;q++)
+ {
+ float l = e(o+0.5f*(vec3)(cos(1.1f*(float)(q)),cos(1.6f*(float)(q)),cos(1.4f*(float)(q))))-m;
+ a+=clamp(4.0f*l,0.0f,1.0f);
+ }
+ v*=a/100.0f;
+ vec4 gl_FragColor=(vec4)(v,1.0f);
+ OUTPUT;
+}
+
diff --git a/kernels/compiler_nautilus_ref.bmp b/kernels/compiler_nautilus_ref.bmp
new file mode 100644
index 0000000..9d2dd96
Binary files /dev/null and b/kernels/compiler_nautilus_ref.bmp differ
diff --git a/kernels/compiler_obread.cl b/kernels/compiler_obread.cl
new file mode 100644
index 0000000..14658d9
--- /dev/null
+++ b/kernels/compiler_obread.cl
@@ -0,0 +1,8 @@
+__kernel void
+compiler_obread(__global uint *src, __global uint *dst)
+{
+ int id = (int)get_global_id(0);
+ const int to = __gen_ocl_obread(src+id);
+ dst[id] = to;
+}
+
diff --git a/kernels/compiler_obwrite.cl b/kernels/compiler_obwrite.cl
new file mode 100644
index 0000000..50e55a1
--- /dev/null
+++ b/kernels/compiler_obwrite.cl
@@ -0,0 +1,8 @@
+__kernel void
+compiler_obwrite(__global uint *src, __global uint *dst)
+{
+ int id = (int)get_global_id(0);
+ const int to = src[id];
+ __gen_ocl_obwrite(dst+id,to);
+}
+
diff --git a/kernels/compiler_region.cl b/kernels/compiler_region.cl
new file mode 100644
index 0000000..d74ac7d
--- /dev/null
+++ b/kernels/compiler_region.cl
@@ -0,0 +1,10 @@
+__kernel void
+compiler_region(__global uint *src, __global uint *dst)
+{
+ __gen_ocl_force_simd16();
+ int id = (int)get_global_id(0);
+ const int x0 = src[id];
+ const int x1 = src[id+16];
+ dst[id] = __gen_ocl_region(0, 16, 8, 2, x0, x1);
+}
+
diff --git a/kernels/compiler_region0.cl b/kernels/compiler_region0.cl
new file mode 100644
index 0000000..5bd57c0
--- /dev/null
+++ b/kernels/compiler_region0.cl
@@ -0,0 +1,11 @@
+__kernel void
+compiler_region0(__global uint *src, __global uint *dst)
+{
+ __gen_ocl_force_simd16();
+ int id = (int)get_global_id(0);
+ const int x0 = src[id];
+ const int x1 = src[id+16];
+ const int x2 = src[id+32];
+ dst[id] = __gen_ocl_region(1, 16, 8, 2, x0, x1, x2);
+}
+
diff --git a/kernels/compiler_region1.cl b/kernels/compiler_region1.cl
new file mode 100644
index 0000000..9deb63c
--- /dev/null
+++ b/kernels/compiler_region1.cl
@@ -0,0 +1,9 @@
+__kernel void
+compiler_region1(__global uint *src, __global uint *dst)
+{
+ __gen_ocl_force_simd16();
+ int id = (int)get_global_id(0);
+ const int x0 = src[id];
+ dst[id] = __gen_ocl_region(0, 16, 8, 2, x0);
+}
+
diff --git a/kernels/compiler_ribbon.cl b/kernels/compiler_ribbon.cl
new file mode 100644
index 0000000..92375e7
--- /dev/null
+++ b/kernels/compiler_ribbon.cl
@@ -0,0 +1,89 @@
+typedef float2 vec2;
+typedef float3 vec3;
+typedef float4 vec4;
+#define sin native_sin
+#define cos native_cos
+#define tan native_tan
+#define normalize fast_normalize
+#define length fast_length
+
+inline vec3 reflect(vec3 I, vec3 N) {
+ return I - 2.0f * dot(N, I) * N;
+}
+
+#define time 1.f
+
+// Object A (tunnel)
+inline float oa(vec3 q) {
+ return cos(q.x)+cos(q.y*1.5f)+cos(q.z)+cos(q.y*20.f)*.05f;
+}
+
+// Object B (ribbon)
+inline float ob(vec3 q) {
+ return length(max(fabs(q-(vec3)(cos(q.z*1.5f)*.3f,-.5f+cos(q.z)*.2f,.0f))-(vec3)(.125f,.02f,time+3.f),(vec3)(.0f)));
+}
+
+// Scene
+inline float o(vec3 q) { return min(oa(q),ob(q)); }
+
+// Get Normal XXX Not inline by LLVM
+__attribute__((always_inline)) vec3 gn(vec3 q) {
+ const vec3 fxyy = (vec3)(.01f, 0.f, 0.f);
+ const vec3 fyxy = (vec3)(0.f, .01f, 0.f);
+ const vec3 fyyx = (vec3)(0.f, 0.f, .01f);
+ return normalize((vec3)(o(q+fxyy),
+ o(q+fyxy),
+ o(q+fyyx)));
+}
+
+inline uint pack_fp4(float4 u4) {
+ uint u;
+ u = (((uint) u4.x)) |
+ (((uint) u4.y) << 8) |
+ (((uint) u4.z) << 16);
+ return u;
+}
+
+// XXX vector not supported in function argument yet
+__kernel void compiler_ribbon(__global uint *dst, float resx, float resy, int w)
+{
+ vec2 gl_FragCoord = (vec2)(get_global_id(0), get_global_id(1));
+ vec2 p = -1.0f + 2.0f * gl_FragCoord.xy / (vec2)(resx, resy);
+ p.x *= resx/resy;
+
+ vec4 c = (vec4)(1.0f);
+ const vec3 org = (vec3)(sin(time)*.5f,
+ cos(time*.5f)*.25f+.25f,
+ time);
+ vec3 dir=normalize((vec3)(p.x*1.6f,p.y,1.0f));
+ vec3 q = org, pp;
+ float d=.0f;
+
+ // First raymarching
+ for(int i=0;i<64;i++) {
+ d=o(q);
+ q+=d*dir;
+ }
+ pp=q;
+ const float f = length(q-org)*0.02f;
+
+ // Second raymarching (reflection)
+ dir=reflect(dir,gn(q));
+ q+=dir;
+ for(int i=0;i<64;i++) {
+ d=o(q);
+ q+=d*dir;
+ }
+ c = max(dot(gn(q), (vec3)(0.1f,0.1f,0.0f)), 0.0f)
+ + (vec4)(0.3f, cos(time*.5f)*.5f+.5f, sin(time*.5f)*.5f+.5f, 1.f) * min(length(q-org)*.04f,1.f);
+
+ // Ribbon Color
+ if(oa(pp)>ob(pp))
+ c = mix(c, (vec4)(cos(time*.3f)*0.5f + 0.5f,cos(time*.2f)*.5f+.5f,sin(time*.3f)*.5f+.5f,1.f),.3f);
+
+ // Final Color
+ const vec4 color = ((c+(vec4)(f))+(1.f-min(pp.y+1.9f,1.f))*(vec4)(1.f,.8f,.7f,1.f))*min(time*.5f,1.f);
+ const vec4 final = 255.f * max(min(color, (vec4)(1.f)), (vec4)(0.f));
+ dst[get_global_id(0) + get_global_id(1) * w] = pack_fp4(final);
+}
+
diff --git a/kernels/compiler_ribbon_ref.bmp b/kernels/compiler_ribbon_ref.bmp
new file mode 100644
index 0000000..2225f45
Binary files /dev/null and b/kernels/compiler_ribbon_ref.bmp differ
diff --git a/kernels/compiler_short_scatter.cl b/kernels/compiler_short_scatter.cl
new file mode 100644
index 0000000..7dad029
--- /dev/null
+++ b/kernels/compiler_short_scatter.cl
@@ -0,0 +1,7 @@
+__kernel void
+compiler_short_scatter(__global short *dst)
+{
+ int id = (int) get_global_id(0);
+ dst[id] = (short) id;
+}
+
diff --git a/kernels/compiler_sub_bytes.cl b/kernels/compiler_sub_bytes.cl
new file mode 100644
index 0000000..f058561
--- /dev/null
+++ b/kernels/compiler_sub_bytes.cl
@@ -0,0 +1,7 @@
+__kernel void
+compiler_sub_bytes(__global char *src0, __global char *src1, __global char *dst)
+{
+ int id = (int)get_global_id(0);
+ dst[id] = src0[id] - src1[id];
+}
+
diff --git a/kernels/compiler_sub_shorts.cl b/kernels/compiler_sub_shorts.cl
new file mode 100644
index 0000000..d26de7f
--- /dev/null
+++ b/kernels/compiler_sub_shorts.cl
@@ -0,0 +1,7 @@
+__kernel void
+compiler_sub_shorts(__global short *src0, __global short *src1, __global short *dst)
+{
+ int id = (int)get_global_id(0);
+ dst[id] = src0[id] - src1[id];
+}
+
diff --git a/kernels/compiler_switch.cl b/kernels/compiler_switch.cl
new file mode 100644
index 0000000..c28b431
--- /dev/null
+++ b/kernels/compiler_switch.cl
@@ -0,0 +1,14 @@
+__kernel void compiler_switch(__global int *dst, __global int *src)
+{
+ switch (get_global_id(0)) {
+ case 0: dst[get_global_id(0)] = src[get_global_id(0) + 4]; break;
+ case 1: dst[get_global_id(0)] = src[get_global_id(0) + 14]; break;
+ case 2: dst[get_global_id(0)] = src[get_global_id(0) + 13]; break;
+ case 6: dst[get_global_id(0)] = src[get_global_id(0) + 11]; break;
+ case 7: dst[get_global_id(0)] = src[get_global_id(0) + 10]; break;
+ case 10: dst[get_global_id(0)] = src[get_global_id(0) + 9]; break;
+ case 12: dst[get_global_id(0)] = src[get_global_id(0) + 6]; break;
+ default: dst[get_global_id(0)] = src[get_global_id(0) + 8]; break;
+ }
+}
+
diff --git a/kernels/compiler_uint16_copy.cl b/kernels/compiler_uint16_copy.cl
new file mode 100644
index 0000000..1072234
--- /dev/null
+++ b/kernels/compiler_uint16_copy.cl
@@ -0,0 +1,8 @@
+__kernel void
+compiler_uint16_copy(__global uint16 *src, __global uint16 *dst)
+{
+ int id = (int)get_global_id(0);
+ dst[id] = src[id];
+}
+
+
diff --git a/kernels/compiler_uint2_copy.cl b/kernels/compiler_uint2_copy.cl
new file mode 100644
index 0000000..7c5c5e3
--- /dev/null
+++ b/kernels/compiler_uint2_copy.cl
@@ -0,0 +1,7 @@
+__kernel void
+compiler_uint2_copy(__global uint2 *src, __global uint2 *dst)
+{
+ int id = (int)get_global_id(0);
+ dst[id] = src[id];
+}
+
diff --git a/kernels/compiler_uint3_copy.cl b/kernels/compiler_uint3_copy.cl
new file mode 100644
index 0000000..7dc71b2
--- /dev/null
+++ b/kernels/compiler_uint3_copy.cl
@@ -0,0 +1,7 @@
+__kernel void
+compiler_uint3_copy(__global uint3 *src, __global uint3 *dst)
+{
+ int id = (int)get_global_id(0);
+ dst[id] = src[id];
+}
+
diff --git a/kernels/compiler_uint3_unaligned_copy.cl b/kernels/compiler_uint3_unaligned_copy.cl
new file mode 100644
index 0000000..a50f0ab
--- /dev/null
+++ b/kernels/compiler_uint3_unaligned_copy.cl
@@ -0,0 +1,8 @@
+__kernel void
+compiler_uint3_unaligned_copy(__global uint *src, __global uint *dst)
+{
+ const int id = (int)get_global_id(0);
+ const uint3 from = vload3(id, src);
+ vstore3(from, id, dst);
+}
+
diff --git a/kernels/compiler_uint8_copy.cl b/kernels/compiler_uint8_copy.cl
new file mode 100644
index 0000000..9eee538
--- /dev/null
+++ b/kernels/compiler_uint8_copy.cl
@@ -0,0 +1,7 @@
+__kernel void
+compiler_uint8_copy(__global uint8 *src, __global uint8 *dst)
+{
+ int id = (int)get_global_id(0);
+ dst[id] = src[id];
+}
+
diff --git a/kernels/compiler_unstructured_branch0.cl b/kernels/compiler_unstructured_branch0.cl
new file mode 100644
index 0000000..66da6e0
--- /dev/null
+++ b/kernels/compiler_unstructured_branch0.cl
@@ -0,0 +1,14 @@
+__kernel void
+compiler_unstructured_branch0(__global int *src, __global int *dst)
+{
+ int id = (int)get_global_id(0);
+ dst[id] = src[id];
+ if (dst[id] >= 0) goto label;
+
+ do {
+ dst[id] = 1;
+ label:
+ id += get_local_size(0);
+ } while (id < 32);
+}
+
diff --git a/kernels/compiler_unstructured_branch1.cl b/kernels/compiler_unstructured_branch1.cl
new file mode 100644
index 0000000..fb937e0
--- /dev/null
+++ b/kernels/compiler_unstructured_branch1.cl
@@ -0,0 +1,14 @@
+__kernel void
+compiler_unstructured_branch1(__global int *src, __global int *dst)
+{
+ int id = (int)get_global_id(0);
+ dst[id] = src[id];
+ if (dst[id] >= 0) goto label1;
+ dst[id] = 1;
+ if (src[id] <= 2) goto label2;
+ label1:
+ dst[id] -= 2;
+ label2:
+ dst[id] += 2;
+}
+
diff --git a/kernels/compiler_unstructured_branch2.cl b/kernels/compiler_unstructured_branch2.cl
new file mode 100644
index 0000000..546f253
--- /dev/null
+++ b/kernels/compiler_unstructured_branch2.cl
@@ -0,0 +1,18 @@
+__kernel void
+compiler_unstructured_branch2(__global int *src, __global int *dst)
+{
+ int id = (int)get_global_id(0);
+ dst[id] = src[id];
+ if (dst[id] < 0) goto label1;
+ dst[id] = 1;
+ if (dst[id] > src[id]) goto label3;
+ dst[id]++;
+ if (src[id] <= 2) goto label2;
+ label1:
+ dst[id] -= 2;
+ label2:
+ dst[id] += 2;
+ label3:
+ dst[id] *= 3;
+}
+
diff --git a/kernels/compiler_unstructured_branch3.cl b/kernels/compiler_unstructured_branch3.cl
new file mode 100644
index 0000000..67b4761
--- /dev/null
+++ b/kernels/compiler_unstructured_branch3.cl
@@ -0,0 +1,16 @@
+__kernel void
+compiler_unstructured_branch3(__global int *src, __global int *dst)
+{
+ int id = (int)get_global_id(0);
+ dst[id] = src[id];
+ if (dst[id] >= 2) goto label1;
+ dst[id] = 1;
+ if (src[id] < 2) goto label2;
+ dst[id]--;
+ label1:
+ dst[id] -= 2;
+ label2:
+ dst[id] += 2;
+}
+
+
diff --git a/kernels/compiler_vote_all.cl b/kernels/compiler_vote_all.cl
new file mode 100644
index 0000000..1918c1c
--- /dev/null
+++ b/kernels/compiler_vote_all.cl
@@ -0,0 +1,10 @@
+__kernel void
+compiler_vote_all(__global uint *src, __global uint *dst)
+{
+ int id = (int)get_global_id(0);
+ if (__gen_ocl_all(id > 8))
+ dst[id] = src[id];
+ else
+ dst[id] = 0;
+}
+
diff --git a/kernels/compiler_vote_any.cl b/kernels/compiler_vote_any.cl
new file mode 100644
index 0000000..0a81e89
--- /dev/null
+++ b/kernels/compiler_vote_any.cl
@@ -0,0 +1,10 @@
+__kernel void
+compiler_vote_any(__global uint *src, __global uint *dst)
+{
+ int id = (int)get_global_id(0);
+ if (__gen_ocl_any(id > 6))
+ dst[id] = src[id];
+ else
+ dst[id] = 0;
+}
+
diff --git a/kernels/compiler_write_only_bytes.cl b/kernels/compiler_write_only_bytes.cl
new file mode 100644
index 0000000..0bc0cd8
--- /dev/null
+++ b/kernels/compiler_write_only_bytes.cl
@@ -0,0 +1,7 @@
+__kernel void
+compiler_write_only_bytes(__global char *dst)
+{
+ int id = (int)get_global_id(0);
+ dst[id] = 2;
+}
+
diff --git a/kernels/compiler_write_only_shorts.cl b/kernels/compiler_write_only_shorts.cl
new file mode 100644
index 0000000..bfd23cc
--- /dev/null
+++ b/kernels/compiler_write_only_shorts.cl
@@ -0,0 +1,7 @@
+__kernel void
+compiler_write_only_shorts(__global short *dst)
+{
+ int id = (int)get_global_id(0);
+ dst[id] = 2;
+}
+
diff --git a/kernels/lenna128x128.bmp b/kernels/lenna128x128.bmp
new file mode 100644
index 0000000..c3d9c46
Binary files /dev/null and b/kernels/lenna128x128.bmp differ
diff --git a/kernels/test_copy_buffer.cl b/kernels/test_copy_buffer.cl
new file mode 100644
index 0000000..2aec892
--- /dev/null
+++ b/kernels/test_copy_buffer.cl
@@ -0,0 +1,7 @@
+__kernel void
+test_copy_buffer(__global float* src, __global float* dst)
+{
+ int id = (int)get_global_id(0);
+ dst[id] = src[id];
+}
+
diff --git a/kernels/test_copy_buffer_row.cl b/kernels/test_copy_buffer_row.cl
new file mode 100644
index 0000000..a55d99e
--- /dev/null
+++ b/kernels/test_copy_buffer_row.cl
@@ -0,0 +1,9 @@
+__kernel void
+test_copy_buffer_row(__global int *src, __global int *dst, __global int *data)
+{
+ int row = data[0];
+ int size = data[1];
+ int id = (int) get_global_id(0);
+ for (; id < size; id += row) dst[id] = src[id];
+}
+
diff --git a/kernels/test_write_only.cl b/kernels/test_write_only.cl
new file mode 100644
index 0000000..bb7e972
--- /dev/null
+++ b/kernels/test_write_only.cl
@@ -0,0 +1,7 @@
+__kernel void
+test_write_only(__global int *dst)
+{
+ int id = (int)get_global_id(0);
+ dst[id] = id;
+}
+
diff --git a/setup_fulsim_hsw.sh b/setup_fulsim_hsw.sh
new file mode 100644
index 0000000..140be66
--- /dev/null
+++ b/setup_fulsim_hsw.sh
@@ -0,0 +1,5 @@
+export INTEL_DEVID_OVERRIDE=0x0094
+export DEVICE=hsw_m0
+export OCL_FULSIM_RUN=1
+export OCL_FULSIM_DEBUG_MODE=$1
+
diff --git a/setup_fulsim_ivb.sh b/setup_fulsim_ivb.sh
new file mode 100644
index 0000000..9df9082
--- /dev/null
+++ b/setup_fulsim_ivb.sh
@@ -0,0 +1,5 @@
+export INTEL_DEVID_OVERRIDE=0x0166 # or, 0x0112
+export DEVICE=ivb_m_gt2 # snb_gt2 for SNB GT2 desktop
+export OCL_SIMULATOR=1 # 0 -> HW, 1 -> fulsim, 2 -> perfsim
+export OCL_FULSIM_DEBUG_MODE=$1
+
diff --git a/setup_perfsim_ivb.sh b/setup_perfsim_ivb.sh
new file mode 100644
index 0000000..4cfdd1a
--- /dev/null
+++ b/setup_perfsim_ivb.sh
@@ -0,0 +1,4 @@
+export INTEL_DEVID_OVERRIDE=0x0166 # or, 0x0112
+export DEVICE=ivb_m_gt2 # snb_gt2 for SNB GT2 desktop
+export OCL_SIMULATOR=2 # 0 -> HW, 1 -> fulsim, 2 -> perfsim
+
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
new file mode 100644
index 0000000..78062fb
--- /dev/null
+++ b/src/CMakeLists.txt
@@ -0,0 +1,40 @@
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}
+ ${DRM_INCLUDE_PATH}
+ ${CMAKE_CURRENT_SOURCE_DIR}/../backend/src/backend/
+ ${CMAKE_CURRENT_SOURCE_DIR}/../include)
+
+set(OPENCL_SRC
+ cl_api.c
+ cl_alloc.c
+ cl_kernel.c
+ cl_program.c
+ cl_sampler.c
+ cl_event.c
+ cl_image.c
+ cl_mem.c
+ cl_platform_id.c
+ cl_device_id.c
+ cl_context.c
+ cl_command_queue.c
+ cl_command_queue.h
+ cl_command_queue_gen7.c
+ cl_driver.h
+ cl_driver.cpp
+ cl_driver_defs.c
+ intel/intel_gpgpu.c
+ intel/intel_batchbuffer.c
+ intel/intel_driver.c
+ x11/dricommon.c
+ x11/va_dri2.c)
+
+link_directories (${LLVM_LIBRARY_DIRS})
+add_library(cl SHARED ${OPENCL_SRC})
+target_link_libraries(cl
+ gbe
+ ${XLIB_LIBRARY}
+ ${XEXT_LIBRARY}
+ ${XFIXES_LIBRARY}
+ ${DRM_INTEL_LIBRARY}
+ ${DRM_LIBRARY})
+install (TARGETS cl LIBRARY DESTINATION lib)
+
diff --git a/src/Makefile b/src/Makefile
new file mode 100644
index 0000000..878bcc0
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,4 @@
+TOP=..
+SUBDIRS=. intel x11
+
+include $(TOP)/Makefile.shared
diff --git a/src/cl_alloc.c b/src/cl_alloc.c
new file mode 100644
index 0000000..20d5578
--- /dev/null
+++ b/src/cl_alloc.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "cl_alloc.h"
+#include "cl_utils.h"
+
+#include <stdlib.h>
+#include <assert.h>
+#include <malloc.h>
+
+static volatile int32_t cl_alloc_n = 0;
+
+LOCAL void*
+cl_malloc(size_t sz)
+{
+ void * p = NULL;
+ atomic_inc(&cl_alloc_n);
+ p = malloc(sz);
+ assert(p);
+ return p;
+}
+
+LOCAL void*
+cl_aligned_malloc(size_t sz, size_t align)
+{
+ void * p = NULL;
+ atomic_inc(&cl_alloc_n);
+ p = memalign(align, sz);
+ assert(p);
+ return p;
+}
+
+LOCAL void*
+cl_calloc(size_t n, size_t elem_size)
+{
+ void *p = NULL;
+ atomic_inc(&cl_alloc_n);
+ p = calloc(n, elem_size);
+ assert(p);
+ return p;
+}
+
+LOCAL void*
+cl_realloc(void *ptr, size_t sz)
+{
+ if (ptr == NULL)
+ atomic_inc(&cl_alloc_n);
+ return realloc(ptr, sz);
+}
+
+LOCAL void
+cl_free(void *ptr)
+{
+ if (ptr == NULL)
+ return;
+ atomic_dec(&cl_alloc_n);
+ free(ptr);
+}
+
+LOCAL size_t
+cl_report_unfreed(void)
+{
+ return cl_alloc_n;
+}
+
+LOCAL void
+cl_report_set_all_freed(void)
+{
+ cl_alloc_n = 0;
+}
+
diff --git a/src/cl_alloc.h b/src/cl_alloc.h
new file mode 100644
index 0000000..9b463ed
--- /dev/null
+++ b/src/cl_alloc.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __CL_ALLOC_H__
+#define __CL_ALLOC_H__
+
+#include "cl_internals.h"
+#include <stdlib.h>
+
+/* Return a valid pointer for the requested memory block size */
+extern void *cl_malloc(size_t sz);
+
+/* Aligned malloc */
+extern void* cl_aligned_malloc(size_t sz, size_t align);
+
+/* malloc + memzero */
+extern void *cl_calloc(size_t n, size_t elem_size);
+
+/* Regular realloc */
+extern void *cl_realloc(void *ptr, size_t sz);
+
+/* Free a pointer allocated with cl_*alloc */
+extern void cl_free(void *ptr);
+
+/* We count the number of allocation. This function report the number of
+ * allocation still unfreed
+ */
+extern size_t cl_report_unfreed(void);
+
+#endif /* __CL_ALLOC_H__ */
+
diff --git a/src/cl_api.c b/src/cl_api.c
new file mode 100644
index 0000000..23e05b7
--- /dev/null
+++ b/src/cl_api.c
@@ -0,0 +1,1184 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "cl_platform_id.h"
+#include "cl_device_id.h"
+#include "cl_context.h"
+#include "cl_command_queue.h"
+#include "cl_program.h"
+#include "cl_kernel.h"
+#include "cl_mem.h"
+#include "cl_image.h"
+#include "cl_sampler.h"
+#include "cl_alloc.h"
+#include "cl_utils.h"
+
+#include "CL/cl.h"
+#include "CL/cl_intel.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+cl_int
+clGetPlatformIDs(cl_uint num_entries,
+ cl_platform_id * platforms,
+ cl_uint * num_platforms)
+{
+ return cl_get_platform_ids(num_entries, platforms, num_platforms);
+}
+
+cl_int
+clGetPlatformInfo(cl_platform_id platform,
+ cl_platform_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret)
+{
+ return cl_get_platform_into(platform,
+ param_name,
+ param_value_size,
+ param_value,
+ param_value_size_ret);
+}
+
+cl_int
+clGetDeviceIDs(cl_platform_id platform,
+ cl_device_type device_type,
+ cl_uint num_entries,
+ cl_device_id * devices,
+ cl_uint * num_devices)
+{
+ return cl_get_device_ids(platform,
+ device_type,
+ num_entries,
+ devices,
+ num_devices);
+}
+
+cl_int
+clGetDeviceInfo(cl_device_id device,
+ cl_device_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret)
+{
+ return cl_get_device_info(device,
+ param_name,
+ param_value_size,
+ param_value,
+ param_value_size_ret);
+}
+
+cl_context
+clCreateContext(const cl_context_properties * properties,
+ cl_uint num_devices,
+ const cl_device_id * devices,
+ void (* pfn_notify) (const char*, const void*, size_t, void*),
+ void * user_data,
+ cl_int * errcode_ret)
+{
+ return cl_create_context(properties,
+ num_devices,
+ devices,
+ pfn_notify,
+ user_data,
+ errcode_ret);
+}
+
+cl_context
+clCreateContextFromtype(const cl_context_properties * properties,
+ cl_device_type device_type,
+ void (CL_CALLBACK *pfn_notify) (const char *, const void *, size_t, void *),
+ void * user_data,
+ cl_int * errcode_ret)
+{
+ NOT_IMPLEMENTED;
+ return NULL;
+}
+
+cl_int
+clRetainContext(cl_context context)
+{
+ cl_int err = CL_SUCCESS;
+ CHECK_CONTEXT (context);
+ cl_context_add_ref(context);
+error:
+ return err;
+}
+
+cl_int
+clReleaseContext(cl_context context)
+{
+ cl_int err = CL_SUCCESS;
+ CHECK_CONTEXT (context);
+ cl_context_delete(context);
+error:
+ return err;
+}
+
+cl_int
+clGetContextInfo(cl_context context,
+ cl_context_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_command_queue
+clCreateCommandQueue(cl_context context,
+ cl_device_id device,
+ cl_command_queue_properties properties,
+ cl_int * errcode_ret)
+{
+ cl_command_queue queue = NULL;
+ cl_int err = CL_SUCCESS;
+ CHECK_CONTEXT (context);
+ queue = cl_context_create_queue(context, device, properties, errcode_ret);
+error:
+ return err == CL_SUCCESS ? queue : NULL;
+}
+
+cl_int
+clRetainCommandQueue(cl_command_queue command_queue)
+{
+ cl_int err = CL_SUCCESS;
+ CHECK_QUEUE (command_queue);
+ cl_command_queue_add_ref(command_queue);
+error:
+ return err;
+}
+
+cl_int
+clReleaseCommandQueue(cl_command_queue command_queue)
+{
+ cl_int err = CL_SUCCESS;
+ CHECK_QUEUE (command_queue);
+ cl_command_queue_delete(command_queue);
+error:
+ return err;
+}
+
+cl_int
+clGetCommandQueueInfo(cl_command_queue command_queue,
+ cl_command_queue_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret)
+{
+ cl_int err = CL_SUCCESS;
+ CHECK_QUEUE (command_queue);
+ NOT_IMPLEMENTED;
+error:
+ return err;
+}
+
+cl_int
+clSetCommandQueueProperty(cl_command_queue command_queue,
+ cl_command_queue_properties properties,
+ cl_bool enable,
+ cl_command_queue_properties * old_properties)
+{
+ cl_int err = CL_SUCCESS;
+ CHECK_QUEUE (command_queue);
+ NOT_IMPLEMENTED;
+error:
+ return err;
+}
+
+cl_mem
+clCreateBuffer(cl_context context,
+ cl_mem_flags flags,
+ size_t size,
+ void * host_ptr,
+ cl_int * errcode_ret)
+{
+ cl_mem mem = NULL;
+ cl_int err = CL_SUCCESS;
+ CHECK_CONTEXT (context);
+
+ mem = cl_mem_new(context, flags, size, host_ptr, &err);
+error:
+ if (errcode_ret)
+ *errcode_ret = err;
+ return mem;
+}
+
+cl_mem
+clCreateSubBuffer(cl_mem buffer,
+ cl_mem_flags flags,
+ cl_buffer_create_type buffer_create_type,
+ const void * buffer_create_info,
+ cl_int * errcode_ret)
+{
+#if 0
+ cl_int err = CL_SUCCESS;
+ CHECK_MEM (buffer);
+ NOT_IMPLEMENTED;
+error:
+#endif
+ return NULL;
+}
+
+cl_mem
+clCreateImage2D(cl_context context,
+ cl_mem_flags flags,
+ const cl_image_format * image_format,
+ size_t image_width,
+ size_t image_height,
+ size_t image_row_pitch,
+ void * host_ptr,
+ cl_int * errcode_ret)
+{
+ cl_mem mem = NULL;
+ cl_int err = CL_SUCCESS;
+ CHECK_CONTEXT (context);
+
+ mem = cl_mem_new_image2D(context,
+ flags,
+ image_format,
+ image_width,
+ image_height,
+ image_row_pitch,
+ host_ptr,
+ errcode_ret);
+error:
+ if (errcode_ret)
+ *errcode_ret = err;
+ return mem;
+}
+
+cl_mem
+clCreateImage3D(cl_context context,
+ cl_mem_flags flags,
+ const cl_image_format * image_format,
+ size_t image_width,
+ size_t image_height,
+ size_t image_depth,
+ size_t image_row_pitch,
+ size_t image_slice_pitch,
+ void * host_ptr,
+ cl_int * errcode_ret)
+{
+ NOT_IMPLEMENTED;
+ return NULL;
+}
+
+cl_int
+clRetainMemObject(cl_mem memobj)
+{
+ cl_int err = CL_SUCCESS;
+ CHECK_MEM (memobj);
+ cl_mem_add_ref(memobj);
+error:
+ return err;
+}
+
+cl_int
+clReleaseMemObject(cl_mem memobj)
+{
+ cl_int err = CL_SUCCESS;
+ CHECK_MEM (memobj);
+ cl_mem_delete(memobj);
+error:
+ return err;
+}
+
+cl_int
+clGetSupportedImageFormats(cl_context ctx,
+ cl_mem_flags flags,
+ cl_mem_type image_type,
+ cl_uint num_entries,
+ cl_image_format *image_formats,
+ cl_uint * num_image_formats)
+{
+ cl_int err = CL_SUCCESS;
+ CHECK_CONTEXT (ctx);
+ if (UNLIKELY(num_entries == 0 && image_formats != NULL)) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+ if (UNLIKELY(image_type != CL_MEM_OBJECT_IMAGE2D &&
+ image_type != CL_MEM_OBJECT_IMAGE3D)) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+ err = cl_image_get_supported_fmt(ctx,
+ image_type,
+ num_entries,
+ image_formats,
+ num_image_formats);
+
+error:
+ return err;
+}
+
+cl_int
+clGetMemObjectInfo(cl_mem memobj,
+ cl_mem_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_int
+clGetImageInfo(cl_mem image,
+ cl_image_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_int
+clSetMemObjectDestructorCallback(cl_mem memobj,
+ void (CL_CALLBACK *pfn_notify) (cl_mem, void*),
+ void * user_data)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_sampler
+clCreateSampler(cl_context context,
+ cl_bool normalized,
+ cl_addressing_mode addressing,
+ cl_filter_mode filter,
+ cl_int * errcode_ret)
+{
+ cl_sampler sampler = NULL;
+ cl_int err = CL_SUCCESS;
+ CHECK_CONTEXT (context);
+ sampler = cl_sampler_new(context, normalized, addressing, filter, &err);
+error:
+ if (errcode_ret)
+ *errcode_ret = err;
+ return sampler;
+}
+
+cl_int
+clRetainSampler(cl_sampler sampler)
+{
+ cl_int err = CL_SUCCESS;
+ CHECK_SAMPLER (sampler);
+ cl_sampler_add_ref(sampler);
+error:
+ return err;
+}
+
+cl_int
+clReleaseSampler(cl_sampler sampler)
+{
+ cl_int err = CL_SUCCESS;
+ CHECK_SAMPLER (sampler);
+ cl_sampler_delete(sampler);
+error:
+ return err;
+}
+
+cl_int
+clGetSamplerInfo(cl_sampler sampler,
+ cl_sampler_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_program
+clCreateProgramWithSource(cl_context context,
+ cl_uint count,
+ const char ** strings,
+ const size_t * lengths,
+ cl_int * errcode_ret)
+{
+ cl_program program = NULL;
+ cl_int err = CL_SUCCESS;
+
+ CHECK_CONTEXT (context);
+ program = cl_program_create_from_source(context,
+ count,
+ strings,
+ lengths,
+ &err);
+error:
+ if (errcode_ret)
+ *errcode_ret = err;
+ return program;
+}
+
+cl_program
+clCreateProgramWithBinary(cl_context context,
+ cl_uint num_devices,
+ const cl_device_id * devices,
+ const size_t * lengths,
+ const unsigned char ** binaries,
+ cl_int * binary_status,
+ cl_int * errcode_ret)
+{
+ cl_program program = NULL;
+ cl_int err = CL_SUCCESS;
+
+ CHECK_CONTEXT (context);
+ program = cl_program_create_from_binary(context,
+ num_devices,
+ devices,
+ lengths,
+ binaries,
+ binary_status,
+ &err);
+error:
+ if (errcode_ret)
+ *errcode_ret = err;
+ return program;
+}
+cl_int
+clRetainProgram(cl_program program)
+{
+ cl_int err = CL_SUCCESS;
+ CHECK_PROGRAM (program);
+ cl_program_add_ref(program);
+error:
+ return err;
+}
+
+cl_int
+clReleaseProgram(cl_program program)
+{
+ cl_int err = CL_SUCCESS;
+ CHECK_PROGRAM (program);
+ cl_program_delete(program);
+error:
+ return err;
+}
+
+cl_int
+clBuildProgram(cl_program program,
+ cl_uint num_devices,
+ const cl_device_id * device_list,
+ const char * options,
+ void (CL_CALLBACK *pfn_notify) (cl_program, void*),
+ void * user_data)
+{
+ cl_int err = CL_SUCCESS;
+ CHECK_PROGRAM(program);
+ INVALID_VALUE_IF (num_devices > 1);
+ INVALID_VALUE_IF (num_devices == 0 && device_list != NULL);
+ INVALID_VALUE_IF (num_devices != 0 && device_list == NULL);
+
+ /* Everything is easy. We only support one device anyway */
+ if (num_devices != 0) {
+ assert(program->ctx);
+ if (UNLIKELY(device_list[0] != program->ctx->device)) {
+ err = CL_INVALID_DEVICE;
+ goto error;
+ }
+ }
+
+ /* TODO support create program from binary */
+ assert(program->source_type == FROM_LLVM ||
+ program->source_type == FROM_SOURCE);
+ cl_program_build(program);
+ program->is_built = CL_TRUE;
+
+ if (pfn_notify) pfn_notify(program, user_data);
+
+error:
+ return err;
+}
+
+cl_int
+clUnloadCompiler(void)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_int
+clGetProgramInfo(cl_program program,
+ cl_program_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_int
+clGetProgramBuildInfo(cl_program program,
+ cl_device_id device,
+ cl_program_build_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_kernel
+clCreateKernel(cl_program program,
+ const char * kernel_name,
+ cl_int * errcode_ret)
+{
+ cl_kernel kernel = NULL;
+ cl_int err = CL_SUCCESS;
+
+ CHECK_PROGRAM (program);
+ if (program->is_built == CL_FALSE) {
+ err = CL_INVALID_PROGRAM_EXECUTABLE;
+ goto error;
+ }
+ kernel = cl_program_create_kernel(program, kernel_name, errcode_ret);
+
+exit:
+ return kernel;
+error:
+ if (errcode_ret)
+ *errcode_ret = err;
+ goto exit;
+}
+
+cl_int
+clCreateKernelsInProgram(cl_program program,
+ cl_uint num_kernels,
+ cl_kernel * kernels,
+ cl_uint * num_kernels_ret)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_int
+clRetainKernel(cl_kernel kernel)
+{
+ cl_int err = CL_SUCCESS;
+ CHECK_KERNEL(kernel);
+ cl_kernel_add_ref(kernel);
+error:
+ return err;
+}
+
+cl_int
+clReleaseKernel(cl_kernel kernel)
+{
+ cl_int err = CL_SUCCESS;
+ CHECK_KERNEL(kernel);
+ cl_kernel_delete(kernel);
+error:
+ return err;
+}
+
+cl_int
+clSetKernelArg(cl_kernel kernel,
+ cl_uint arg_index,
+ size_t arg_size,
+ const void * arg_value)
+{
+ cl_int err = CL_SUCCESS;
+ CHECK_KERNEL(kernel);
+ err = cl_kernel_set_arg(kernel, arg_index, arg_size, arg_value);
+error:
+ return err;
+}
+
+cl_int
+clGetKernelInfo(cl_kernel kernel,
+ cl_kernel_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_int
+clGetKernelWorkGroupInfo(cl_kernel kernel,
+ cl_device_id device,
+ cl_kernel_work_group_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret)
+{
+ return cl_get_kernel_workgroup_info(device,
+ param_name,
+ param_value_size,
+ param_value,
+ param_value_size_ret);
+}
+
+cl_int
+clWaitForEvents(cl_uint num_events,
+ const cl_event * event_list)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_int
+clGetEventInfo(cl_event event,
+ cl_event_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_event
+clCreateUserEvent(cl_context context,
+ cl_int * errcode_ret)
+{
+ NOT_IMPLEMENTED;
+ return NULL;
+}
+
+cl_int
+clRetainEvent(cl_event event)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_int
+clReleaseEvent(cl_event event)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_int
+clSetUserEventStatus(cl_event event,
+ cl_int execution_status)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_int
+clSetEventCallback(cl_event event,
+ cl_int command_exec_callback_type,
+ void (CL_CALLBACK * pfn_notify) (cl_event, cl_int, void *),
+ void * user_data)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_int
+clGetEventProfilingInfo(cl_event event,
+ cl_profiling_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_int
+clFlush(cl_command_queue command_queue)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_int
+clFinish(cl_command_queue command_queue)
+{
+ cl_int err = CL_SUCCESS;
+
+ CHECK_QUEUE (command_queue);
+ err = cl_command_queue_finish(command_queue);
+
+exit:
+ return err;
+error:
+ goto exit;
+}
+
+cl_int
+clEnqueueReadBuffer(cl_command_queue command_queue,
+ cl_mem buffer,
+ cl_bool blocking_read,
+ size_t offset,
+ size_t cb,
+ void * ptr,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event)
+{
+ cl_int err = CL_SUCCESS;
+ assert(ptr != NULL);
+ void* temp_ptr = NULL;
+ temp_ptr = clIntelMapBuffer(buffer, &err);
+ assert(err == CL_SUCCESS);
+ memcpy(ptr, temp_ptr, cb);
+ return err;
+}
+
+cl_int
+clEnqueueReadBufferRect(cl_command_queue command_queue,
+ cl_mem buffer,
+ cl_bool blocking_read,
+ const size_t * buffer_origin,
+ const size_t * host_origin,
+ const size_t * region,
+ size_t buffer_row_pitch,
+ size_t buffer_slice_pitch,
+ size_t host_row_pitch,
+ size_t host_slice_pitch,
+ void * ptr,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_int
+clEnqueueWriteBuffer(cl_command_queue command_queue,
+ cl_mem buffer,
+ cl_bool blocking_write,
+ size_t offset,
+ size_t cb,
+ const void * ptr,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_int
+clEnqueueWriteBufferRect(cl_command_queue command_queue,
+ cl_mem buffer,
+ cl_bool blocking_write,
+ const size_t * buffer_origin,
+ const size_t * host_origin,
+ const size_t * region,
+ size_t buffer_row_pitch,
+ size_t buffer_slice_pitch,
+ size_t host_row_pitch,
+ size_t host_slice_pitch,
+ const void * ptr,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_int
+clEnqueueCopyBuffer(cl_command_queue command_queue,
+ cl_mem src_buffer,
+ cl_mem dst_buffer,
+ size_t src_offset,
+ size_t dst_offset,
+ size_t cb,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_int
+clEnqueueCopyBufferRect(cl_command_queue command_queue,
+ cl_mem src_buffer,
+ cl_mem dst_buffer,
+ const size_t * src_origin,
+ const size_t * dst_origin,
+ const size_t * region,
+ size_t src_row_pitch,
+ size_t src_slice_pitch,
+ size_t dst_row_pitch,
+ size_t dst_slice_pitch,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_int
+clEnqueueReadImage(cl_command_queue command_queue,
+ cl_mem image,
+ cl_bool blocking_read,
+ const size_t * origin,
+ const size_t * region,
+ size_t row_pitch,
+ size_t slice_pitch,
+ void * ptr,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_int
+clEnqueueWriteImage(cl_command_queue command_queue,
+ cl_mem image,
+ cl_bool blocking_write,
+ const size_t * origin,
+ const size_t * region,
+ size_t input_row_pitch,
+ size_t input_slice_pitch,
+ const void * ptr,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_int
+clEnqueueCopyImage(cl_command_queue command_queue,
+ cl_mem src_image,
+ cl_mem dst_image,
+ const size_t * src_origin,
+ const size_t * dst_origin,
+ const size_t * region,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_int
+clEnqueueCopyImageToBuffer(cl_command_queue command_queue,
+ cl_mem src_image,
+ cl_mem dst_buffer,
+ const size_t * src_origin,
+ const size_t * region,
+ size_t dst_offset,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_int
+clEnqueueCopyBufferToImage(cl_command_queue command_queue,
+ cl_mem src_buffer,
+ cl_mem dst_image,
+ size_t src_offset,
+ const size_t * dst_origin,
+ const size_t * region,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+void *
+clEnqueueMapBuffer(cl_command_queue command_queue,
+ cl_mem buffer,
+ cl_bool blocking_map,
+ cl_map_flags map_flags,
+ size_t offset,
+ size_t cb,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event,
+ cl_int * errcode_ret)
+{
+ NOT_IMPLEMENTED;
+ return NULL;
+}
+
+void *
+clEnqueueMapImage(cl_command_queue command_queue,
+ cl_mem image,
+ cl_bool blocking_map,
+ cl_map_flags map_flags,
+ const size_t * origin,
+ const size_t * region,
+ size_t * image_row_pitch,
+ size_t * image_slice_pitch,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event,
+ cl_int * errcode_ret)
+{
+ NOT_IMPLEMENTED;
+ return NULL;
+}
+
+cl_int
+clEnqueueUnmapMemObject(cl_command_queue command_queue,
+ cl_mem memobj,
+ void * mapped_ptr,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_int
+clEnqueueNDRangeKernel(cl_command_queue command_queue,
+ cl_kernel kernel,
+ cl_uint work_dim,
+ const size_t * global_work_offset,
+ const size_t * global_work_size,
+ const size_t * local_work_size,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event)
+{
+ size_t fixed_global_off[] = {0,0,0};
+ size_t fixed_global_sz[] = {1,1,1};
+ size_t fixed_local_sz[] = {16,1,1};
+ cl_int err = CL_SUCCESS;
+ size_t i;
+
+ CHECK_QUEUE(command_queue);
+ CHECK_KERNEL(kernel);
+
+ /* Check number of dimensions we have */
+ if (UNLIKELY(work_dim == 0 || work_dim > 3)) {
+ err = CL_INVALID_WORK_DIMENSION;
+ goto error;
+ }
+
+ /* We need a work size per dimension */
+ if (UNLIKELY(global_work_size == NULL)) {
+ err = CL_INVALID_GLOBAL_WORK_SIZE;
+ goto error;
+ }
+
+ /* Local size must be non-null */
+ for (i = 0; i < work_dim; ++i)
+ if (UNLIKELY(local_work_size[i] == 0)) {
+ err = CL_INVALID_WORK_GROUP_SIZE;
+ goto error;
+ }
+
+ /* Check offset values. We add a non standard restriction. The offsets must
+ * also be evenly divided by the local sizes
+ */
+ if (global_work_offset != NULL)
+ for (i = 0; i < work_dim; ++i) {
+ if (UNLIKELY(~0LL - global_work_offset[i] > global_work_size[i])) {
+ err = CL_INVALID_GLOBAL_OFFSET;
+ goto error;
+ }
+ if (UNLIKELY(global_work_offset[i] % local_work_size[i])) {
+ err = CL_INVALID_GLOBAL_OFFSET;
+ goto error;
+ }
+ }
+
+ /* Local sizes must divide global sizes */
+ if (local_work_size != NULL)
+ for (i = 0; i < work_dim; ++i)
+ if (UNLIKELY(global_work_size[i] % local_work_size[i])) {
+ err = CL_INVALID_WORK_GROUP_SIZE;
+ goto error;
+ }
+
+ /* Queue and kernel must share the same context */
+ assert(kernel->program);
+ if (command_queue->ctx != kernel->program->ctx) {
+ err = CL_INVALID_CONTEXT;
+ goto error;
+ }
+
+ /* XXX No event right now */
+ FATAL_IF(num_events_in_wait_list > 0, "Events are not supported");
+ FATAL_IF(event_wait_list != NULL, "Events are not supported");
+ FATAL_IF(event != NULL, "Events are not supported");
+
+ if (local_work_size != NULL)
+ for (i = 0; i < work_dim; ++i)
+ fixed_local_sz[i] = local_work_size[i];
+ if (global_work_size != NULL)
+ for (i = 0; i < work_dim; ++i)
+ fixed_global_sz[i] = global_work_size[i];
+ if (global_work_offset != NULL)
+ for (i = 0; i < work_dim; ++i)
+ fixed_global_off[i] = global_work_offset[i];
+
+ /* Do device specific checks are enqueue the kernel */
+ err = cl_command_queue_ND_range(command_queue,
+ kernel,
+ fixed_global_off,
+ fixed_global_sz,
+ fixed_local_sz);
+
+error:
+ return err;
+}
+
+cl_int
+clEnqueueTask(cl_command_queue command_queue,
+ cl_kernel kernel,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_int
+clEnqueueNativeKernel(cl_command_queue command_queue,
+ void (*user_func)(void *),
+ void * args,
+ size_t cb_args,
+ cl_uint num_mem_objects,
+ const cl_mem * mem_list,
+ const void ** args_mem_loc,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_int
+clEnqueueMarker(cl_command_queue command_queue,
+ cl_event * event)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_int
+clEnqueueWaitForEvents(cl_command_queue command_queue,
+ cl_uint num_events,
+ const cl_event * event_list)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+cl_int
+clEnqueueBarrier(cl_command_queue command_queue)
+{
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+void*
+clGetExtensionFunctionAddress(const char *func_name)
+{
+ NOT_IMPLEMENTED;
+ return NULL;
+}
+
+cl_int
+clIntelReportUnfreed(void)
+{
+ return cl_report_unfreed();
+}
+
+void*
+clIntelMapBuffer(cl_mem mem, cl_int *errcode_ret)
+{
+ void *ptr = NULL;
+ cl_int err = CL_SUCCESS;
+ CHECK_MEM (mem);
+ ptr = cl_mem_map(mem);
+error:
+ if (errcode_ret)
+ *errcode_ret = err;
+ return ptr;
+}
+
+cl_int
+clIntelUnmapBuffer(cl_mem mem)
+{
+ cl_int err = CL_SUCCESS;
+ CHECK_MEM (mem);
+ err = cl_mem_unmap(mem);
+error:
+ return err;
+}
+
+cl_int
+clIntelPinBuffer(cl_mem mem)
+{
+ cl_int err = CL_SUCCESS;
+ CHECK_MEM (mem);
+ cl_mem_pin(mem);
+error:
+ return err;
+}
+
+cl_int
+clIntelUnpinBuffer(cl_mem mem)
+{
+ cl_int err = CL_SUCCESS;
+ CHECK_MEM (mem);
+ cl_mem_unpin(mem);
+error:
+ return err;
+}
+
+cl_int
+clIntelGetGenVersion(cl_device_id device, cl_int *ver)
+{
+ return cl_device_get_version(device, ver);
+}
+
+cl_program
+clCreateProgramWithLLVM(cl_context context,
+ cl_uint num_devices,
+ const cl_device_id * devices,
+ const char * filename,
+ cl_int * errcode_ret)
+{
+ return cl_program_create_from_llvm(context,
+ num_devices,
+ devices,
+ filename,
+ errcode_ret);
+}
+
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
new file mode 100644
index 0000000..d32f284
--- /dev/null
+++ b/src/cl_command_queue.c
@@ -0,0 +1,372 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "cl_command_queue.h"
+#include "cl_context.h"
+#include "cl_program.h"
+#include "cl_kernel.h"
+#include "cl_device_id.h"
+#include "cl_mem.h"
+#include "cl_utils.h"
+#include "cl_alloc.h"
+#include "cl_driver.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+LOCAL cl_command_queue
+cl_command_queue_new(cl_context ctx)
+{
+ cl_command_queue queue = NULL;
+
+ assert(ctx);
+ TRY_ALLOC_NO_ERR (queue, CALLOC(struct _cl_command_queue));
+ queue->magic = CL_MAGIC_QUEUE_HEADER;
+ queue->ref_n = 1;
+ queue->ctx = ctx;
+ TRY_ALLOC_NO_ERR (queue->gpgpu, cl_gpgpu_new(ctx->drv));
+
+ /* Append the command queue in the list */
+ pthread_mutex_lock(&ctx->queue_lock);
+ queue->next = ctx->queues;
+ if (ctx->queues != NULL)
+ ctx->queues->prev = queue;
+ ctx->queues = queue;
+ pthread_mutex_unlock(&ctx->queue_lock);
+
+ /* The queue also belongs to its context */
+ cl_context_add_ref(ctx);
+
+exit:
+ return queue;
+error:
+ cl_command_queue_delete(queue);
+ queue = NULL;
+ goto exit;
+}
+
+LOCAL void
+cl_command_queue_delete(cl_command_queue queue)
+{
+ assert(queue);
+ if (atomic_dec(&queue->ref_n) != 1) return;
+
+ /* Remove it from the list */
+ assert(queue->ctx);
+ pthread_mutex_lock(&queue->ctx->queue_lock);
+ if (queue->prev)
+ queue->prev->next = queue->next;
+ if (queue->next)
+ queue->next->prev = queue->prev;
+ if (queue->next == NULL && queue->prev == NULL)
+ queue->ctx->queues = NULL;
+ pthread_mutex_unlock(&queue->ctx->queue_lock);
+ if (queue->fulsim_out != NULL) {
+ cl_mem_delete(queue->fulsim_out);
+ queue->fulsim_out = NULL;
+ }
+ cl_buffer_unreference(queue->last_batch);
+ cl_mem_delete(queue->perf);
+ cl_context_delete(queue->ctx);
+ cl_gpgpu_delete(queue->gpgpu);
+ queue->magic = CL_MAGIC_DEAD_HEADER; /* For safety */
+ cl_free(queue);
+}
+
+LOCAL void
+cl_command_queue_add_ref(cl_command_queue queue)
+{
+ atomic_inc(&queue->ref_n);
+}
+
+LOCAL cl_int
+cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k)
+{
+ /* Bind all user buffers (given by clSetKernelArg) */
+ uint32_t i;
+ for (i = 0; i < k->arg_n; ++i) {
+ uint32_t offset; // location of the address in the curbe
+ if (gbe_kernel_get_arg_type(k->opaque, i) != GBE_ARG_GLOBAL_PTR &&
+ gbe_kernel_get_arg_type(k->opaque, i) != GBE_ARG_CONSTANT_PTR)
+ continue;
+ offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, i);
+ cl_gpgpu_bind_buf(queue->gpgpu, k->args[i].mem->bo, offset, cc_llc_l3);
+ }
+
+ return CL_SUCCESS;
+}
+
+#if USE_FULSIM
+extern void drm_intel_bufmgr_gem_stop_aubfile(cl_buffer_mgr);
+extern void drm_intel_bufmgr_gem_set_aubfile(cl_buffer_mgr, FILE*);
+extern void aub_exec_dump_raw_file(cl_buffer, size_t offset, size_t sz);
+
+static void
+cl_run_fulsim(void)
+{
+ const char *run_it = getenv("OCL_SIMULATOR");
+ const char *debug_mode = getenv("OCL_FULSIM_DEBUG_MODE");
+ if (run_it == NULL || strcmp(run_it, "1")) return;
+
+#if EMULATE_GEN == 7 /* IVB */
+ if (debug_mode == NULL || strcmp(debug_mode, "1"))
+ system("wine AubLoad.exe dump.aub -device ivbB0");
+ else
+ system("wine AubLoad.exe dump.aub -device ivbB0 -debug");
+#elif EMULATE_GEN == 75 /* HSW */
+ if (debug_mode == NULL || strcmp(debug_mode, "1"))
+ system("wine AubLoad.exe dump.aub -device hsw.h.a0");
+ else
+ system("wine AubLoad.exe dump.aub -device hsw.h.a0 -debug");
+#else
+#error "Unknown device"
+#endif
+}
+
+/* Each buffer is dump using several chunks of this size */
+static const size_t chunk_sz = 8192u;
+
+static cl_int
+cl_fulsim_dump_all_surfaces(cl_command_queue queue, cl_kernel k)
+{
+ cl_int err = CL_SUCCESS;
+ cl_mem mem = NULL;
+ int i;
+ size_t j;
+
+ /* Bind user defined surface */
+ for (i = 0; i < k->arg_n; ++i) {
+ size_t chunk_n, chunk_remainder;
+ if (gbe_kernel_get_arg_type(k->opaque, i) != GBE_ARG_GLOBAL_PTR)
+ continue;
+ mem = (cl_mem) k->args[i].mem;
+ CHECK_MEM(mem);
+ chunk_n = cl_buffer_get_size(mem->bo) / chunk_sz;
+ chunk_remainder = cl_buffer_get_size(mem->bo) % chunk_sz;
+ for (j = 0; j < chunk_n; ++j)
+ aub_exec_dump_raw_file(mem->bo, j * chunk_sz, chunk_sz);
+ if (chunk_remainder)
+ aub_exec_dump_raw_file(mem->bo, chunk_n * chunk_sz, chunk_remainder);
+ }
+error:
+ return err;
+}
+
+struct bmphdr {
+ /* 2 bytes of magic here, "BM", total header size is 54 bytes! */
+ int filesize; /* 4 total file size incl header */
+ short as0, as1; /* 8 app specific */
+ int bmpoffset; /* 12 ofset of bmp data */
+ int headerbytes; /* 16 bytes in header from this point (40 actually) */
+ int width; /* 20 */
+ int height; /* 24 */
+ short nplanes; /* 26 no of color planes */
+ short bpp; /* 28 bits/pixel */
+ int compression; /* 32 BI_RGB = 0 = no compression */
+ int sizeraw; /* 36 size of raw bmp file, excluding header, incl padding */
+ int hres; /* 40 horz resolutions pixels/meter */
+ int vres; /* 44 */
+ int npalcolors; /* 48 No of colors in palette */
+ int nimportant; /* 52 No of important colors */
+ /* raw b, g, r data here, dword aligned per scan line */
+};
+
+static int*
+cl_read_bmp(const char *filename, int *width, int *height)
+{
+ int n;
+ struct bmphdr hdr;
+
+ FILE *fp = fopen(filename, "rb");
+ assert(fp);
+
+ char magic[2];
+ n = fread(&magic[0], 1, 2, fp);
+ assert(n == 2 && magic[0] == 'B' && magic[1] == 'M');
+
+ n = fread(&hdr, 1, sizeof(hdr), fp);
+ assert(n == sizeof(hdr));
+
+ assert(hdr.width > 0 &&
+ hdr.height > 0 &&
+ hdr.nplanes == 1
+ && hdr.compression == 0);
+
+ int *rgb32 = (int *) cl_malloc(hdr.width * hdr.height * sizeof(int));
+ assert(rgb32);
+ int x, y;
+
+ int *dst = rgb32;
+ for (y = 0; y < hdr.height; y++) {
+ for (x = 0; x < hdr.width; x++) {
+ assert(!feof(fp));
+ int b = (getc(fp) & 0x0ff);
+ int g = (getc(fp) & 0x0ff);
+ int r = (getc(fp) & 0x0ff);
+ *dst++ = (r | (g << 8) | (b << 16) | 0xff000000); /* abgr */
+ }
+ while (x & 3) {
+ getc(fp);
+ x++;
+ }
+ }
+ fclose(fp);
+ *width = hdr.width;
+ *height = hdr.height;
+ return rgb32;
+}
+
+static char*
+cl_read_dump(const char *name, size_t *size)
+{
+ char *raw = NULL, *dump = NULL;
+ size_t i, sz;
+ int w, h;
+ if ((raw = (char*) cl_read_bmp(name, &w, &h)) == NULL)
+ return NULL;
+ sz = w * h;
+ dump = (char*) cl_malloc(sz);
+ assert(dump);
+ for (i = 0; i < sz; ++i)
+ dump[i] = raw[4*i];
+ cl_free(raw);
+ if (size)
+ *size = sz;
+ return dump;
+}
+
+static cl_int
+cl_fulsim_read_all_surfaces(cl_command_queue queue, cl_kernel k)
+{
+ cl_int err = CL_SUCCESS;
+ cl_mem mem = NULL;
+ char *from = NULL, *to = NULL;
+ size_t size, j, chunk_n, chunk_remainder;
+ int i, curr = 0;
+ /* Bind user defined surface */
+ for (i = 0; i < k->arg_n; ++i) {
+ if (gbe_kernel_get_arg_type(k->opaque, i) != GBE_ARG_GLOBAL_PTR)
+ continue;
+ mem = (cl_mem) k->args[i].mem;
+ CHECK_MEM(mem);
+ assert(mem->bo);
+ chunk_n = cl_buffer_get_size(mem->bo) / chunk_sz;
+ chunk_remainder = cl_buffer_get_size(mem->bo) % chunk_sz;
+ to = cl_mem_map(mem);
+ for (j = 0; j < chunk_n; ++j) {
+ char name[256];
+ sprintf(name, "dump%03i.bmp", curr);
+#ifdef NDEBUG
+ from = cl_read_dump(name, NULL);
+#else
+ from = cl_read_dump(name, &size);
+ assert(size == chunk_sz);
+#endif /* NDEBUG */
+ memcpy(to + j*chunk_sz, from, chunk_sz);
+ cl_free(from);
+ curr++;
+ }
+ if (chunk_remainder) {
+ char name[256];
+ sprintf(name, "dump%03i.bmp", curr);
+#ifdef NDEBUG
+ from = cl_read_dump(name, NULL);
+#else
+ from = cl_read_dump(name, &size);
+ assert(size == chunk_remainder);
+#endif /* NDEBUG */
+ memcpy(to + chunk_n*chunk_sz, from, chunk_remainder);
+ cl_free(from);
+ curr++;
+ }
+ cl_mem_unmap(mem);
+ }
+error:
+ return err;
+}
+#endif
+
+extern cl_int cl_command_queue_ND_range_gen7(cl_command_queue, cl_kernel, const size_t *, const size_t *, const size_t *);
+
+static cl_int
+cl_kernel_check_args(cl_kernel k)
+{
+ uint32_t i;
+ for (i = 0; i < k->arg_n; ++i)
+ if (k->args[i].is_set == CL_FALSE)
+ return CL_INVALID_KERNEL_ARGS;
+ return CL_SUCCESS;
+}
+
+LOCAL cl_int
+cl_command_queue_ND_range(cl_command_queue queue,
+ cl_kernel k,
+ const size_t *global_wk_off,
+ const size_t *global_wk_sz,
+ const size_t *local_wk_sz)
+{
+ const int32_t ver = cl_driver_get_ver(queue->ctx->drv);
+ cl_int err = CL_SUCCESS;
+
+ /* Check that the user did not forget any argument */
+ TRY (cl_kernel_check_args, k);
+
+#if USE_FULSIM
+ cl_buffer_mgr bufmgr = NULL;
+ FILE *file = NULL;
+ const char *run_it = getenv("OCL_SIMULATOR");
+ if (run_it != NULL && strcmp(run_it, "1") == 0) {
+ file = fopen("dump.aub", "wb");
+ FATAL_IF (file == NULL, "Unable to open file dump.aub");
+ bufmgr = cl_context_get_bufmgr(queue->ctx);
+ drm_intel_bufmgr_gem_set_aubfile(bufmgr, file);
+ }
+#endif /* USE_FULSIM */
+
+ if (ver == 7 || ver == 75)
+ TRY (cl_command_queue_ND_range_gen7, queue, k, global_wk_off, global_wk_sz, local_wk_sz);
+ else
+ FATAL ("Unknown Gen Device");
+
+#if USE_FULSIM
+ if (run_it != NULL && strcmp(run_it, "1") == 0) {
+ TRY (cl_fulsim_dump_all_surfaces, queue, k);
+ drm_intel_bufmgr_gem_stop_aubfile(bufmgr);
+ fclose(file);
+ cl_run_fulsim();
+ TRY (cl_fulsim_read_all_surfaces, queue, k);
+ }
+#endif /* USE_FULSIM */
+
+error:
+ return err;
+}
+
+LOCAL cl_int
+cl_command_queue_finish(cl_command_queue queue)
+{
+ if (queue->last_batch == NULL)
+ return CL_SUCCESS;
+ cl_buffer_wait_rendering(queue->last_batch);
+ cl_buffer_unreference(queue->last_batch);
+ queue->last_batch = NULL;
+ return CL_SUCCESS;
+}
+
diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h
new file mode 100644
index 0000000..1e2bcc1
--- /dev/null
+++ b/src/cl_command_queue.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __CL_COMMAND_QUEUE_H__
+#define __CL_COMMAND_QUEUE_H__
+
+#include "cl_internals.h"
+#include "cl_driver.h"
+#include "CL/cl.h"
+#include <stdint.h>
+
+struct intel_gpgpu;
+
+/* Basically, this is a (kind-of) batch buffer */
+struct _cl_command_queue {
+ uint64_t magic; /* To identify it as a command queue */
+ volatile int ref_n; /* We reference count this object */
+ cl_context ctx; /* Its parent context */
+ cl_command_queue prev, next; /* We chain the command queues together */
+ cl_gpgpu gpgpu; /* Setup all GEN commands */
+ cl_mem perf; /* Where to put the perf counters */
+ cl_mem fulsim_out; /* Fulsim will output this buffer */
+ cl_buffer last_batch; /* To synchronize using clFinish */
+};
+
+/* Allocate and initialize a new command queue. Also insert it in the list of
+ * command queue in the associated context
+ */
+extern cl_command_queue cl_command_queue_new(cl_context);
+
+/* Destroy and deallocate the command queue */
+extern void cl_command_queue_delete(cl_command_queue);
+
+/* Keep one more reference on the queue */
+extern void cl_command_queue_add_ref(cl_command_queue);
+
+/* Map ND range kernel from OCL API */
+extern cl_int cl_command_queue_ND_range(cl_command_queue queue,
+ cl_kernel ker,
+ const size_t *global_work_offset,
+ const size_t *global_work_size,
+ const size_t *local_work_size);
+
+/* The memory object where to report the performance */
+extern cl_int cl_command_queue_set_report_buffer(cl_command_queue, cl_mem);
+
+/* Fulsim will dump this buffer (mostly to check its consistency */
+cl_int cl_command_queue_set_fulsim_buffer(cl_command_queue, cl_mem);
+
+/* Wait for the completion of the command queue */
+extern cl_int cl_command_queue_finish(cl_command_queue);
+
+/* Bind all the surfaces in the GPGPU state */
+extern cl_int cl_command_queue_bind_surface(cl_command_queue, cl_kernel);
+
+#endif /* __CL_COMMAND_QUEUE_H__ */
+
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
new file mode 100644
index 0000000..3a590bc
--- /dev/null
+++ b/src/cl_command_queue_gen7.c
@@ -0,0 +1,248 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "cl_command_queue.h"
+#include "cl_context.h"
+#include "cl_program.h"
+#include "cl_kernel.h"
+#include "cl_device_id.h"
+#include "cl_mem.h"
+#include "cl_utils.h"
+#include "cl_alloc.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+static INLINE size_t cl_kernel_compute_batch_sz(cl_kernel k) { return 256+32; }
+
+/* "Varing" payload is the part of the curbe that changes accross threads in the
+ * same work group. Right now, it consists in local IDs and block IPs
+ */
+static cl_int
+cl_set_varying_payload(const cl_kernel ker,
+ char *data,
+ const size_t *local_wk_sz,
+ size_t simd_sz,
+ size_t cst_sz,
+ size_t thread_n)
+{
+ uint32_t *ids[3] = {NULL,NULL,NULL};
+ uint16_t *block_ips = NULL;
+ size_t i, j, k, curr = 0;
+ int32_t id_offset[3], ip_offset;
+ cl_int err = CL_SUCCESS;
+
+ id_offset[0] = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_X, 0);
+ id_offset[1] = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_Y, 0);
+ id_offset[2] = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_Z, 0);
+ ip_offset = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_BLOCK_IP, 0);
+ assert(id_offset[0] >= 0 &&
+ id_offset[1] >= 0 &&
+ id_offset[2] >= 0 &&
+ ip_offset >= 0);
+
+ TRY_ALLOC(ids[0], (uint32_t*) alloca(sizeof(uint32_t)*thread_n*simd_sz));
+ TRY_ALLOC(ids[1], (uint32_t*) alloca(sizeof(uint32_t)*thread_n*simd_sz));
+ TRY_ALLOC(ids[2], (uint32_t*) alloca(sizeof(uint32_t)*thread_n*simd_sz));
+ TRY_ALLOC(block_ips, (uint16_t*) alloca(sizeof(uint16_t)*thread_n*simd_sz));
+
+ /* 0xffff means that the lane is inactivated */
+ memset(block_ips, 0xff, sizeof(uint16_t)*thread_n*simd_sz);
+
+ /* Compute the IDs and the block IPs */
+ for (k = 0; k < local_wk_sz[2]; ++k)
+ for (j = 0; j < local_wk_sz[1]; ++j)
+ for (i = 0; i < local_wk_sz[0]; ++i, ++curr) {
+ ids[0][curr] = i;
+ ids[1][curr] = j;
+ ids[2][curr] = k;
+ block_ips[curr] = 0;
+ }
+
+ /* Copy them to the constant buffer */
+ curr = 0;
+ for (i = 0; i < thread_n; ++i, data += cst_sz) {
+ uint32_t *ids0 = (uint32_t *) (data + id_offset[0]);
+ uint32_t *ids1 = (uint32_t *) (data + id_offset[1]);
+ uint32_t *ids2 = (uint32_t *) (data + id_offset[2]);
+ uint16_t *ips = (uint16_t *) (data + ip_offset);
+ for (j = 0; j < simd_sz; ++j, ++curr) {
+ ids0[j] = ids[0][curr];
+ ids1[j] = ids[1][curr];
+ ids2[j] = ids[2][curr];
+ ips[j] = block_ips[curr];
+ }
+ }
+
+error:
+ return err;
+}
+
+/* Will return the total amount of slm used */
+static int32_t
+cl_curbe_fill(cl_kernel ker,
+ const size_t *global_wk_off,
+ const size_t *global_wk_sz,
+ const size_t *local_wk_sz,
+ size_t thread_n)
+{
+ int32_t offset;
+#define UPLOAD(ENUM, VALUE) \
+ if ((offset = gbe_kernel_get_curbe_offset(ker->opaque, ENUM, 0)) >= 0) \
+ *((uint32_t *) (ker->curbe + offset)) = VALUE;
+ UPLOAD(GBE_CURBE_LOCAL_SIZE_X, local_wk_sz[0]);
+ UPLOAD(GBE_CURBE_LOCAL_SIZE_Y, local_wk_sz[1]);
+ UPLOAD(GBE_CURBE_LOCAL_SIZE_Z, local_wk_sz[2]);
+ UPLOAD(GBE_CURBE_GLOBAL_SIZE_X, global_wk_sz[0]);
+ UPLOAD(GBE_CURBE_GLOBAL_SIZE_Y, global_wk_sz[1]);
+ UPLOAD(GBE_CURBE_GLOBAL_SIZE_Z, global_wk_sz[2]);
+ UPLOAD(GBE_CURBE_GLOBAL_OFFSET_X, global_wk_off[0]);
+ UPLOAD(GBE_CURBE_GLOBAL_OFFSET_Y, global_wk_off[1]);
+ UPLOAD(GBE_CURBE_GLOBAL_OFFSET_Z, global_wk_off[2]);
+ UPLOAD(GBE_CURBE_GROUP_NUM_X, global_wk_sz[0]/local_wk_sz[0]);
+ UPLOAD(GBE_CURBE_GROUP_NUM_Y, global_wk_sz[1]/local_wk_sz[1]);
+ UPLOAD(GBE_CURBE_GROUP_NUM_Z, global_wk_sz[2]/local_wk_sz[2]);
+ UPLOAD(GBE_CURBE_THREAD_NUM, thread_n);
+#undef UPLOAD
+
+ /* Write identity for the stack pointer. This is required by the stack pointer
+ * computation in the kernel
+ */
+ if ((offset = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_STACK_POINTER, 0)) >= 0) {
+ const uint32_t simd_sz = gbe_kernel_get_simd_width(ker->opaque);
+ uint32_t *stackptr = (uint32_t *) (ker->curbe + offset);
+ int32_t i;
+ for (i = 0; i < (int32_t) simd_sz; ++i) stackptr[i] = i;
+ }
+
+ /* Handle the various offsets to SLM */
+ const int32_t arg_n = gbe_kernel_get_arg_num(ker->opaque);
+ int32_t arg, slm_offset = 0;
+ for (arg = 0; arg < arg_n; ++arg) {
+ const enum gbe_arg_type type = gbe_kernel_get_arg_type(ker->opaque, arg);
+ if (type != GBE_ARG_LOCAL_PTR)
+ continue;
+ offset = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_KERNEL_ARGUMENT, arg);
+ assert(offset >= 0);
+ uint32_t *slmptr = (uint32_t *) (ker->curbe + offset);
+ *slmptr = slm_offset;
+ slm_offset += ker->args[arg].local_sz;
+ }
+
+ return slm_offset;
+}
+
+static void
+cl_bind_stack(cl_gpgpu gpgpu, cl_kernel ker)
+{
+ cl_context ctx = ker->program->ctx;
+ cl_device_id device = ctx->device;
+ const int32_t per_lane_stack_sz = gbe_kernel_get_stack_size(ker->opaque);
+ const int32_t value = GBE_CURBE_EXTRA_ARGUMENT;
+ const int32_t sub_value = GBE_STACK_BUFFER;
+ const int32_t offset = gbe_kernel_get_curbe_offset(ker->opaque, value, sub_value);
+ int32_t stack_sz = per_lane_stack_sz;
+
+ /* No stack required for this kernel */
+ if (per_lane_stack_sz == 0)
+ return;
+
+ /* The stack size is given for *each* SIMD lane. So, we accordingly compute
+ * the size we need for the complete machine
+ */
+ assert(offset >= 0);
+ stack_sz *= gbe_kernel_get_simd_width(ker->opaque);
+ stack_sz *= device->max_compute_unit;
+ stack_sz *= device->max_thread_per_unit;
+ cl_gpgpu_set_stack(gpgpu, offset, stack_sz, cc_llc_l3);
+}
+
+LOCAL cl_int
+cl_command_queue_ND_range_gen7(cl_command_queue queue,
+ cl_kernel ker,
+ const size_t *global_wk_off,
+ const size_t *global_wk_sz,
+ const size_t *local_wk_sz)
+{
+ cl_context ctx = queue->ctx;
+ cl_gpgpu gpgpu = queue->gpgpu;
+ char *final_curbe = NULL; /* Includes them and one sub-buffer per group */
+ cl_gpgpu_kernel kernel;
+ const uint32_t simd_sz = cl_kernel_get_simd_width(ker);
+ size_t i, batch_sz = 0u, local_sz = 0u, cst_sz = ker->curbe_sz;
+ size_t thread_n = 0u;
+ cl_int err = CL_SUCCESS;
+
+ /* Setup kernel */
+ kernel.name = "KERNEL";
+ kernel.grf_blocks = 128;
+ kernel.bo = ker->bo;
+ kernel.barrierID = 0;
+ kernel.slm_sz = 0;
+ kernel.use_slm = gbe_kernel_use_slm(ker->opaque);
+
+ /* Compute the number of HW threads we need */
+ TRY (cl_kernel_work_group_sz, ker, local_wk_sz, 3, &local_sz);
+ kernel.thread_n = thread_n = local_sz / simd_sz;
+ kernel.cst_sz = cst_sz;
+
+ /* Curbe step 1: fill the constant buffer data shared by all threads */
+ if (ker->curbe)
+ kernel.slm_sz = cl_curbe_fill(ker, global_wk_off, global_wk_sz, local_wk_sz, thread_n);
+
+ /* Setup the kernel */
+ cl_gpgpu_state_init(gpgpu, ctx->device->max_compute_unit, cst_sz / 32);
+ if (queue->last_batch != NULL)
+ cl_buffer_unreference(queue->last_batch);
+ queue->last_batch = NULL;
+
+ /* Bind user buffers */
+ cl_command_queue_bind_surface(queue, ker);
+
+ /* Bind a stack if needed */
+ cl_bind_stack(gpgpu, ker);
+ cl_gpgpu_states_setup(gpgpu, &kernel);
+
+ /* Curbe step 2. Give the localID and upload it to video memory */
+ if (ker->curbe) {
+ assert(cst_sz > 0);
+ TRY_ALLOC (final_curbe, (char*) alloca(thread_n * cst_sz));
+ for (i = 0; i < thread_n; ++i)
+ memcpy(final_curbe + cst_sz * i, ker->curbe, cst_sz);
+ TRY (cl_set_varying_payload, ker, final_curbe, local_wk_sz, simd_sz, cst_sz, thread_n);
+ cl_gpgpu_upload_constants(gpgpu, final_curbe, thread_n*cst_sz);
+ }
+
+ /* Start a new batch buffer */
+ batch_sz = cl_kernel_compute_batch_sz(ker);
+ cl_gpgpu_batch_reset(gpgpu, batch_sz);
+ cl_gpgpu_batch_start(gpgpu);
+
+ /* Issue the GPGPU_WALKER command */
+ cl_gpgpu_walker(gpgpu, simd_sz, thread_n, global_wk_off, global_wk_sz, local_wk_sz);
+
+ /* Close the batch buffer and submit it */
+ cl_gpgpu_batch_end(gpgpu, 0);
+ cl_gpgpu_flush(gpgpu);
+
+error:
+ return err;
+}
+
diff --git a/src/cl_context.c b/src/cl_context.c
new file mode 100644
index 0000000..385c733
--- /dev/null
+++ b/src/cl_context.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "cl_platform_id.h"
+#include "cl_device_id.h"
+#include "cl_context.h"
+#include "cl_command_queue.h"
+#include "cl_mem.h"
+#include "cl_alloc.h"
+#include "cl_utils.h"
+#include "cl_driver.h"
+
+#include "CL/cl.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <assert.h>
+
+static cl_int
+cl_context_properties_is_ok(const cl_context_properties *properties)
+{
+ const cl_context_properties *prop = properties;
+ size_t prop_n = 0;
+ cl_int err = CL_SUCCESS;
+
+ if (properties == NULL)
+ goto exit;
+ while (*prop) {
+ prop += 2;
+ prop_n++;
+ }
+
+ /* XXX */
+ FATAL_IF (prop_n > 1, "Only one property is supported now");
+ INVALID_VALUE_IF (*properties != CL_CONTEXT_PLATFORM);
+ if (UNLIKELY((cl_platform_id) properties[1] != intel_platform)) {
+ err = CL_INVALID_PLATFORM;
+ goto error;
+ }
+
+exit:
+error:
+ return err;
+}
+
+static cl_int
+cl_device_id_is_ok(const cl_device_id device)
+{
+ return device != cl_get_gt_device() ? CL_FALSE : CL_TRUE;
+}
+
+LOCAL cl_context
+cl_create_context(const cl_context_properties * properties,
+ cl_uint num_devices,
+ const cl_device_id * devices,
+ void (CL_CALLBACK * pfn_notify) (const char*, const void*, size_t, void*),
+ void * user_data,
+ cl_int * errcode_ret)
+{
+ /* cl_platform_id platform = NULL; */
+ cl_context ctx = NULL;
+ cl_int err = CL_SUCCESS;
+
+ /* Assert parameters correctness */
+ INVALID_VALUE_IF (devices == NULL);
+ INVALID_VALUE_IF (num_devices == 0);
+ INVALID_VALUE_IF (pfn_notify == NULL && user_data != NULL);
+
+ /* XXX */
+ FATAL_IF (pfn_notify != NULL || user_data != NULL, "Unsupported call back");
+ FATAL_IF (num_devices != 1, "Only one device is supported");
+
+ /* Check that we are getting the right platform */
+ if (UNLIKELY((err = cl_context_properties_is_ok(properties)) != CL_SUCCESS))
+ goto error;
+ /* platform = intel_platform; */
+
+ /* Now check if the user is asking for the right device */
+ if (UNLIKELY(cl_device_id_is_ok(*devices) == CL_FALSE)) {
+ err = CL_INVALID_DEVICE;
+ goto error;
+ }
+
+ /* We are good */
+ if (UNLIKELY((ctx = cl_context_new()) == NULL)) {
+ err = CL_OUT_OF_HOST_MEMORY;
+ goto error;
+ }
+
+ /* Attach the device to the context */
+ ctx->device = *devices;
+
+exit:
+ if (errcode_ret != NULL)
+ *errcode_ret = err;
+ return ctx;
+error:
+ cl_context_delete(ctx);
+ ctx = NULL;
+ goto exit;
+}
+
+LOCAL cl_context
+cl_context_new(void)
+{
+ cl_context ctx = NULL;
+
+ TRY_ALLOC_NO_ERR (ctx, CALLOC(struct _cl_context));
+ TRY_ALLOC_NO_ERR (ctx->drv, cl_driver_new());
+ ctx->magic = CL_MAGIC_CONTEXT_HEADER;
+ ctx->ref_n = 1;
+ ctx->ver = cl_driver_get_ver(ctx->drv);
+ pthread_mutex_init(&ctx->program_lock, NULL);
+ pthread_mutex_init(&ctx->queue_lock, NULL);
+ pthread_mutex_init(&ctx->buffer_lock, NULL);
+ pthread_mutex_init(&ctx->sampler_lock, NULL);
+
+exit:
+ return ctx;
+error:
+ cl_context_delete(ctx);
+ ctx = NULL;
+ goto exit;
+}
+
+LOCAL void
+cl_context_delete(cl_context ctx)
+{
+ if (UNLIKELY(ctx == NULL))
+ return;
+
+ /* We are not done yet */
+ if (atomic_dec(&ctx->ref_n) > 1)
+ return;
+
+ /* All object lists should have been freed. Otherwise, the reference counter
+ * of the context cannot be 0
+ */
+ assert(ctx->queues == NULL);
+ assert(ctx->programs == NULL);
+ assert(ctx->buffers == NULL);
+ assert(ctx->drv);
+ cl_driver_delete(ctx->drv);
+ ctx->magic = CL_MAGIC_DEAD_HEADER; /* For safety */
+ cl_free(ctx);
+}
+
+LOCAL void
+cl_context_add_ref(cl_context ctx)
+{
+ assert(ctx);
+ atomic_inc(&ctx->ref_n);
+}
+
+LOCAL cl_command_queue
+cl_context_create_queue(cl_context ctx,
+ cl_device_id device,
+ cl_command_queue_properties properties, /* XXX */
+ cl_int *errcode_ret)
+{
+ cl_command_queue queue = NULL;
+ cl_int err = CL_SUCCESS;
+
+ if (UNLIKELY(device != ctx->device)) {
+ err = CL_INVALID_DEVICE;
+ goto error;
+ }
+
+ /* We create the command queue and store it in the context list of queues */
+ TRY_ALLOC (queue, cl_command_queue_new(ctx));
+
+exit:
+ if (errcode_ret)
+ *errcode_ret = err;
+ return queue;
+error:
+ cl_command_queue_delete(queue);
+ goto exit;
+}
+
+cl_buffer_mgr
+cl_context_get_bufmgr(cl_context ctx)
+{
+ return cl_driver_get_bufmgr(ctx->drv);
+}
+
diff --git a/src/cl_context.h b/src/cl_context.h
new file mode 100644
index 0000000..56fd01c
--- /dev/null
+++ b/src/cl_context.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __CL_CONTEXT_H__
+#define __CL_CONTEXT_H__
+
+#include "cl_internals.h"
+#include "cl_driver.h"
+#include "CL/cl.h"
+
+#include <stdint.h>
+#include <pthread.h>
+
+/* DRI device created at create context */
+struct intel_driver;
+
+/* Encapsulate the whole device */
+struct _cl_context {
+ uint64_t magic; /* To identify it as a context */
+ volatile int ref_n; /* We reference count this object */
+ cl_driver drv; /* Handles HW or simulator */
+ cl_device_id device; /* All information about the GPU device */
+ cl_command_queue queues; /* All command queues currently allocated */
+ cl_program programs; /* All programs currently allocated */
+ cl_mem buffers; /* All memory object currently allocated */
+ cl_sampler samplers; /* All sampler object currently allocated */
+ pthread_mutex_t queue_lock; /* To allocate and deallocate queues */
+ pthread_mutex_t program_lock; /* To allocate and deallocate programs */
+ pthread_mutex_t buffer_lock; /* To allocate and deallocate buffers */
+ pthread_mutex_t sampler_lock; /* To allocate and deallocate samplers */
+ uint32_t ver; /* Gen version */
+};
+
+/* Implement OpenCL function */
+extern cl_context cl_create_context(const cl_context_properties*,
+ cl_uint,
+ const cl_device_id*,
+ void (CL_CALLBACK * pfn_notify) (const char*, const void*, size_t, void*),
+ void *,
+ cl_int*);
+
+/* Allocate and initialize a context */
+extern cl_context cl_context_new(void);
+
+/* Destroy and deallocate a context */
+extern void cl_context_delete(cl_context);
+
+/* Increment the context reference counter */
+extern void cl_context_add_ref(cl_context);
+
+/* Create the command queue from the given context and device */
+extern cl_command_queue cl_context_create_queue(cl_context,
+ cl_device_id,
+ cl_command_queue_properties,
+ cl_int*);
+
+/* Enqueue a ND Range kernel */
+extern cl_int cl_context_ND_kernel(cl_context,
+ cl_command_queue,
+ cl_kernel,
+ cl_uint,
+ const size_t*,
+ const size_t*,
+ const size_t*);
+
+/* Used for allocation */
+extern cl_buffer_mgr cl_context_get_bufmgr(cl_context ctx);
+
+#endif /* __CL_CONTEXT_H__ */
+
diff --git a/src/cl_device_data.h b/src/cl_device_data.h
new file mode 100644
index 0000000..e794739
--- /dev/null
+++ b/src/cl_device_data.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __CL_DEVICE_DATA_H__
+#define __CL_DEVICE_DATA_H__
+
+#define PCI_CHIP_GM45_GM 0x2A42
+#define PCI_CHIP_IGD_E_G 0x2E02
+#define PCI_CHIP_Q45_G 0x2E12
+#define PCI_CHIP_G45_G 0x2E22
+#define PCI_CHIP_G41_G 0x2E32
+
+#define PCI_CHIP_IGDNG_D_G 0x0042
+#define PCI_CHIP_IGDNG_M_G 0x0046
+
+#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \
+ devid == PCI_CHIP_Q45_G || \
+ devid == PCI_CHIP_G45_G || \
+ devid == PCI_CHIP_G41_G)
+#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM)
+#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid))
+
+#define IS_IGDNG_D(devid) (devid == PCI_CHIP_IGDNG_D_G)
+#define IS_IGDNG_M(devid) (devid == PCI_CHIP_IGDNG_M_G)
+#define IS_IGDNG(devid) (IS_IGDNG_D(devid) || IS_IGDNG_M(devid))
+
+#ifndef PCI_CHIP_SANDYBRIDGE_BRIDGE
+#define PCI_CHIP_SANDYBRIDGE_BRIDGE 0x0100 /* Desktop */
+#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102
+#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112
+#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122
+#define PCI_CHIP_SANDYBRIDGE_BRIDGE_M 0x0104 /* Mobile */
+#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106
+#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116
+#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126
+#define PCI_CHIP_SANDYBRIDGE_BRIDGE_S 0x0108 /* Server */
+#define PCI_CHIP_SANDYBRIDGE_S_GT 0x010A
+#endif
+
+#define IS_GEN6(devid) \
+ (devid == PCI_CHIP_SANDYBRIDGE_GT1 || \
+ devid == PCI_CHIP_SANDYBRIDGE_GT2 || \
+ devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS || \
+ devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \
+ devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \
+ devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS || \
+ devid == PCI_CHIP_SANDYBRIDGE_S_GT)
+
+#define PCI_CHIP_IVYBRIDGE_GT1 0x0152 /* Desktop */
+#define PCI_CHIP_IVYBRIDGE_GT2 0x0162
+#define PCI_CHIP_IVYBRIDGE_M_GT1 0x0156 /* Mobile */
+#define PCI_CHIP_IVYBRIDGE_M_GT2 0x0166
+#define PCI_CHIP_IVYBRIDGE_S_GT1 0x015a /* Server */
+
+#define IS_IVB_GT1(devid) \
+ (devid == PCI_CHIP_IVYBRIDGE_GT1 || \
+ devid == PCI_CHIP_IVYBRIDGE_M_GT1 || \
+ devid == PCI_CHIP_IVYBRIDGE_S_GT1)
+
+#define IS_IVB_GT2(devid) \
+ (devid == PCI_CHIP_IVYBRIDGE_GT2 || \
+ devid == PCI_CHIP_IVYBRIDGE_M_GT2)
+
+#define IS_IVYBRIDGE(devid) (IS_IVB_GT1(devid) || IS_IVB_GT2(devid))
+#define IS_GEN7(devid) IS_IVYBRIDGE(devid)
+
+#define PCI_CHIP_HASWELL_M0 0x0094
+#define PCI_CHIP_HASWELL_D0 0x0090
+#define PCI_CHIP_HASWELL_M 0x0091
+#define PCI_CHIP_HASWELL_L 0x0092
+
+#define IS_HASWELL(devid) ((devid) == PCI_CHIP_HASWELL_M0 || \
+ (devid) == PCI_CHIP_HASWELL_D0 || \
+ (devid) == PCI_CHIP_HASWELL_M || \
+ (devid) == PCI_CHIP_HASWELL_L)
+#define IS_GEN75(devid) IS_HASWELL(devid)
+
+#endif /* __CL_DEVICE_DATA_H__ */
+
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
new file mode 100644
index 0000000..4fc7939
--- /dev/null
+++ b/src/cl_device_id.c
@@ -0,0 +1,274 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "cl_platform_id.h"
+#include "cl_device_id.h"
+#include "cl_internals.h"
+#include "cl_utils.h"
+#include "cl_driver.h"
+#include "cl_device_data.h"
+#include "CL/cl.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+static struct _cl_device_id intel_ivb_gt2_device = {
+ .max_compute_unit = 128,
+ .max_thread_per_unit = 8,
+ .max_work_item_sizes = {512, 512, 512},
+ .max_work_group_size = 512,
+ .max_clock_frequency = 1000,
+ .wg_sz = 1024,
+ .compile_wg_sz = {0},
+#include "cl_gen7_device.h"
+};
+
+static struct _cl_device_id intel_ivb_gt1_device = {
+ .max_compute_unit = 64,
+ .max_thread_per_unit = 8,
+ .max_work_item_sizes = {512, 512, 512},
+ .max_work_group_size = 512,
+ .max_clock_frequency = 1000,
+ .wg_sz = 512,
+ .compile_wg_sz = {0},
+#include "cl_gen7_device.h"
+};
+
+/* XXX we clone IVB for HSW now */
+static struct _cl_device_id intel_hsw_device = {
+ .max_compute_unit = 64,
+ .max_thread_per_unit = 8,
+ .max_work_item_sizes = {512, 512, 512},
+ .max_work_group_size = 512,
+ .max_clock_frequency = 1000,
+ .wg_sz = 512,
+ .compile_wg_sz = {0},
+#include "cl_gen75_device.h"
+};
+
+LOCAL cl_device_id
+cl_get_gt_device(void)
+{
+ cl_device_id ret = NULL;
+ const int device_id = cl_driver_get_device_id();
+
+ /* XXX we pick IVB for HSW now */
+ if (device_id == PCI_CHIP_HASWELL_M ||
+ device_id == PCI_CHIP_HASWELL_L ||
+ device_id == PCI_CHIP_HASWELL_M0 ||
+ device_id == PCI_CHIP_HASWELL_D0) {
+ intel_hsw_device.vendor_id = device_id;
+ intel_hsw_device.platform = intel_platform;
+ ret = &intel_hsw_device;
+ }
+ else if (device_id == PCI_CHIP_IVYBRIDGE_GT1 ||
+ device_id == PCI_CHIP_IVYBRIDGE_M_GT1 ||
+ device_id == PCI_CHIP_IVYBRIDGE_S_GT1) {
+ intel_ivb_gt1_device.vendor_id = device_id;
+ intel_ivb_gt1_device.platform = intel_platform;
+ ret = &intel_ivb_gt1_device;
+ }
+ else if (device_id == PCI_CHIP_IVYBRIDGE_GT2 ||
+ device_id == PCI_CHIP_IVYBRIDGE_M_GT2) {
+ intel_ivb_gt2_device.vendor_id = device_id;
+ intel_ivb_gt2_device.platform = intel_platform;
+ ret = &intel_ivb_gt2_device;
+ }
+ return ret;
+}
+
+LOCAL cl_int
+cl_get_device_ids(cl_platform_id platform,
+ cl_device_type device_type,
+ cl_uint num_entries,
+ cl_device_id * devices,
+ cl_uint * num_devices)
+{
+ /* Check parameter consistency */
+ if (UNLIKELY(num_entries == 0 && devices == NULL && num_devices == NULL))
+ return CL_SUCCESS;
+ if (UNLIKELY(devices == NULL && num_devices == NULL))
+ return CL_INVALID_VALUE;
+ if (UNLIKELY(platform != NULL && platform != intel_platform))
+ return CL_INVALID_PLATFORM;
+ if (num_devices && (device_type == CL_DEVICE_TYPE_CPU)) {
+ *num_devices = 0;
+ return CL_SUCCESS;
+ }
+
+ /* Detect our device (reject a non intel one or gen<6) */
+ if (devices && UNLIKELY((*devices = cl_get_gt_device()) != NULL)) {
+ if (num_devices)
+ *num_devices = 1;
+ return CL_SUCCESS;
+ }
+ else {
+ if (num_devices)
+ *num_devices = 1;
+ return CL_SUCCESS;
+ }
+}
+
+#define DECL_FIELD(CASE,FIELD) \
+ case JOIN(CL_DEVICE_,CASE): \
+ if (param_value_size < sizeof(((cl_device_id)NULL)->FIELD)) \
+ return CL_INVALID_VALUE; \
+ if (param_value_size_ret != NULL) \
+ *param_value_size_ret = sizeof(((cl_device_id)NULL)->FIELD);\
+ memcpy(param_value, \
+ &device->FIELD, \
+ sizeof(((cl_device_id)NULL)->FIELD)); \
+ return CL_SUCCESS;
+
+#define DECL_STRING_FIELD(CASE,FIELD) \
+ case JOIN(CL_DEVICE_,CASE): \
+ if (param_value_size < device->JOIN(FIELD,_sz)) \
+ return CL_INVALID_VALUE; \
+ if (param_value_size_ret != NULL) \
+ *param_value_size_ret = device->JOIN(FIELD,_sz); \
+ memcpy(param_value, device->FIELD, device->JOIN(FIELD,_sz)); \
+ return CL_SUCCESS;
+
+LOCAL cl_int
+cl_get_device_info(cl_device_id device,
+ cl_device_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret)
+{
+ if (UNLIKELY(device != &intel_ivb_gt1_device &&
+ device != &intel_ivb_gt2_device &&
+ device != &intel_hsw_device))
+ return CL_INVALID_DEVICE;
+ if (UNLIKELY(param_value == NULL))
+ return CL_INVALID_VALUE;
+
+ /* Find the correct parameter */
+ switch (param_name) {
+ DECL_FIELD(TYPE, device_type)
+ DECL_FIELD(VENDOR_ID, vendor_id)
+ DECL_FIELD(MAX_COMPUTE_UNITS, max_compute_unit)
+ DECL_FIELD(MAX_WORK_ITEM_DIMENSIONS, max_work_item_dimensions)
+ DECL_FIELD(MAX_WORK_ITEM_SIZES, max_work_item_sizes)
+ DECL_FIELD(MAX_WORK_GROUP_SIZE, max_work_group_size)
+ DECL_FIELD(PREFERRED_VECTOR_WIDTH_CHAR, preferred_vector_width_char)
+ DECL_FIELD(PREFERRED_VECTOR_WIDTH_SHORT, preferred_vector_width_short)
+ DECL_FIELD(PREFERRED_VECTOR_WIDTH_INT, preferred_vector_width_int)
+ DECL_FIELD(PREFERRED_VECTOR_WIDTH_LONG, preferred_vector_width_long)
+ DECL_FIELD(PREFERRED_VECTOR_WIDTH_FLOAT, preferred_vector_width_float)
+ DECL_FIELD(PREFERRED_VECTOR_WIDTH_DOUBLE, preferred_vector_width_double)
+ DECL_FIELD(PREFERRED_VECTOR_WIDTH_HALF, preferred_vector_width_half)
+ DECL_FIELD(NATIVE_VECTOR_WIDTH_CHAR, native_vector_width_char)
+ DECL_FIELD(NATIVE_VECTOR_WIDTH_SHORT, native_vector_width_short)
+ DECL_FIELD(NATIVE_VECTOR_WIDTH_INT, native_vector_width_int)
+ DECL_FIELD(NATIVE_VECTOR_WIDTH_LONG, native_vector_width_long)
+ DECL_FIELD(NATIVE_VECTOR_WIDTH_FLOAT, native_vector_width_float)
+ DECL_FIELD(NATIVE_VECTOR_WIDTH_DOUBLE, native_vector_width_double)
+ DECL_FIELD(NATIVE_VECTOR_WIDTH_HALF, native_vector_width_half)
+ DECL_FIELD(MAX_CLOCK_FREQUENCY, max_clock_frequency)
+ DECL_FIELD(ADDRESS_BITS, address_bits)
+ DECL_FIELD(MAX_MEM_ALLOC_SIZE, max_mem_alloc_size)
+ DECL_FIELD(IMAGE_SUPPORT, image_support)
+ DECL_FIELD(MAX_READ_IMAGE_ARGS, max_read_image_args)
+ DECL_FIELD(MAX_WRITE_IMAGE_ARGS, max_write_image_args)
+ DECL_FIELD(IMAGE2D_MAX_WIDTH, image2d_max_width)
+ DECL_FIELD(IMAGE2D_MAX_HEIGHT, image2d_max_height)
+ DECL_FIELD(IMAGE3D_MAX_WIDTH, image3d_max_width)
+ DECL_FIELD(IMAGE3D_MAX_HEIGHT, image3d_max_height)
+ DECL_FIELD(IMAGE3D_MAX_DEPTH, image3d_max_depth)
+ DECL_FIELD(MAX_SAMPLERS, max_samplers)
+ DECL_FIELD(MAX_PARAMETER_SIZE, max_parameter_size)
+ DECL_FIELD(MEM_BASE_ADDR_ALIGN, mem_base_addr_align)
+ DECL_FIELD(MIN_DATA_TYPE_ALIGN_SIZE, min_data_type_align_size)
+ DECL_FIELD(SINGLE_FP_CONFIG, single_fp_config)
+ DECL_FIELD(GLOBAL_MEM_CACHE_TYPE, global_mem_cache_type)
+ DECL_FIELD(GLOBAL_MEM_CACHELINE_SIZE, global_mem_cache_line_size)
+ DECL_FIELD(GLOBAL_MEM_CACHE_SIZE, global_mem_cache_size)
+ DECL_FIELD(GLOBAL_MEM_SIZE, global_mem_size)
+ DECL_FIELD(MAX_CONSTANT_BUFFER_SIZE, max_constant_buffer_size)
+ DECL_FIELD(MAX_CONSTANT_ARGS, max_constant_args)
+ DECL_FIELD(LOCAL_MEM_TYPE, local_mem_type)
+ DECL_FIELD(LOCAL_MEM_SIZE, local_mem_size)
+ DECL_FIELD(ERROR_CORRECTION_SUPPORT, error_correction_support)
+ DECL_FIELD(HOST_UNIFIED_MEMORY, host_unified_memory)
+ DECL_FIELD(PROFILING_TIMER_RESOLUTION, profiling_timer_resolution)
+ DECL_FIELD(ENDIAN_LITTLE, endian_little)
+ DECL_FIELD(AVAILABLE, available)
+ DECL_FIELD(COMPILER_AVAILABLE, compiler_available)
+ DECL_FIELD(EXECUTION_CAPABILITIES, execution_capabilities)
+ DECL_FIELD(QUEUE_PROPERTIES, queue_properties)
+ DECL_FIELD(PLATFORM, platform)
+ DECL_STRING_FIELD(NAME, name)
+ DECL_STRING_FIELD(VENDOR, vendor)
+ DECL_STRING_FIELD(VERSION, version)
+ DECL_STRING_FIELD(PROFILE, profile)
+ DECL_STRING_FIELD(OPENCL_C_VERSION, opencl_c_version)
+ default: return CL_INVALID_VALUE;
+ };
+}
+
+LOCAL cl_int
+cl_device_get_version(cl_device_id device, cl_int *ver)
+{
+ if (UNLIKELY(device != &intel_ivb_gt1_device &&
+ device != &intel_ivb_gt2_device &&
+ device != &intel_hsw_device))
+ return CL_INVALID_DEVICE;
+ if (ver == NULL)
+ return CL_SUCCESS;
+ if (device == &intel_ivb_gt1_device || device == &intel_ivb_gt2_device)
+ *ver = 7;
+ else
+ *ver = 75;
+ return CL_SUCCESS;
+}
+#undef DECL_FIELD
+
+#define DECL_FIELD(CASE,FIELD) \
+ case JOIN(CL_KERNEL_,CASE): \
+ if (param_value_size < sizeof(((cl_device_id)NULL)->FIELD)) \
+ return CL_INVALID_VALUE; \
+ if (param_value_size_ret != NULL) \
+ *param_value_size_ret = sizeof(((cl_device_id)NULL)->FIELD);\
+ memcpy(param_value, \
+ &device->FIELD, \
+ sizeof(((cl_device_id)NULL)->FIELD)); \
+ return CL_SUCCESS;
+
+LOCAL cl_int
+cl_get_kernel_workgroup_info(cl_device_id device,
+ cl_kernel_work_group_info param_name,
+ size_t param_value_size,
+ void* param_value,
+ size_t* param_value_size_ret)
+{
+ if (UNLIKELY(device != &intel_ivb_gt1_device &&
+ device != &intel_ivb_gt2_device))
+ return CL_INVALID_DEVICE;
+ if (UNLIKELY(param_value == NULL))
+ return CL_INVALID_VALUE;
+
+ switch (param_name) {
+ DECL_FIELD(WORK_GROUP_SIZE, wg_sz)
+ DECL_FIELD(COMPILE_WORK_GROUP_SIZE, compile_wg_sz)
+ default: return CL_INVALID_VALUE;
+ };
+}
+
diff --git a/src/cl_device_id.h b/src/cl_device_id.h
new file mode 100644
index 0000000..b7ba6b3
--- /dev/null
+++ b/src/cl_device_id.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __CL_DEVICE_ID_H__
+#define __CL_DEVICE_ID_H__
+
+/* Store complete information about the device */
+struct _cl_device_id {
+ cl_device_type device_type;
+ cl_uint vendor_id;
+ cl_uint max_compute_unit;
+ cl_uint max_thread_per_unit;
+ cl_uint max_work_item_dimensions;
+ size_t max_work_item_sizes[3];
+ cl_uint max_work_group_size;
+ cl_uint preferred_vector_width_char;
+ cl_uint preferred_vector_width_short;
+ cl_uint preferred_vector_width_int;
+ cl_uint preferred_vector_width_long;
+ cl_uint preferred_vector_width_float;
+ cl_uint preferred_vector_width_double;
+ cl_uint preferred_vector_width_half;
+ cl_uint native_vector_width_char;
+ cl_uint native_vector_width_short;
+ cl_uint native_vector_width_int;
+ cl_uint native_vector_width_long;
+ cl_uint native_vector_width_float;
+ cl_uint native_vector_width_double;
+ cl_uint native_vector_width_half;
+ cl_uint max_clock_frequency;
+ cl_uint address_bits;
+ cl_ulong max_mem_alloc_size;
+ cl_bool image_support;
+ cl_uint max_read_image_args;
+ cl_uint max_write_image_args;
+ size_t image2d_max_width;
+ size_t image2d_max_height;
+ size_t image3d_max_width;
+ size_t image3d_max_height;
+ size_t image3d_max_depth;
+ cl_uint max_samplers;
+ cl_uint max_parameter_size;
+ cl_uint mem_base_addr_align;
+ cl_uint min_data_type_align_size;
+ cl_device_fp_config single_fp_config;
+ cl_device_mem_cache_type global_mem_cache_type;
+ cl_uint global_mem_cache_line_size;
+ cl_ulong global_mem_cache_size;
+ cl_ulong global_mem_size;
+ cl_ulong max_constant_buffer_size;
+ cl_uint max_constant_args;
+ cl_device_local_mem_type local_mem_type;
+ cl_ulong local_mem_size;
+ cl_bool error_correction_support;
+ cl_bool host_unified_memory;
+ size_t profiling_timer_resolution;
+ cl_bool endian_little;
+ cl_bool available;
+ cl_bool compiler_available;
+ cl_device_exec_capabilities execution_capabilities;
+ cl_command_queue_properties queue_properties;
+ cl_platform_id platform;
+ const char *name;
+ const char *vendor;
+ const char *version;
+ const char *profile;
+ const char *opencl_c_version;
+ const char *extensions;
+ size_t name_sz;
+ size_t vendor_sz;
+ size_t version_sz;
+ size_t profile_sz;
+ size_t opencl_c_version_sz;
+ size_t extensions_sz;
+ /* Kernel specific info that we're assigning statically */
+ size_t wg_sz;
+ size_t compile_wg_sz[3];
+};
+
+/* Get a device from the given platform */
+extern cl_int cl_get_device_ids(cl_platform_id platform,
+ cl_device_type device_type,
+ cl_uint num_entries,
+ cl_device_id * devices,
+ cl_uint * num_devices);
+
+/* Get the intel GPU device we currently have in this machine (if any) */
+extern cl_device_id cl_get_gt_device(void);
+
+/* Provide info about the device */
+extern cl_int cl_get_device_info(cl_device_id device,
+ cl_device_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret);
+
+extern cl_int cl_get_kernel_workgroup_info(cl_device_id device,
+ cl_kernel_work_group_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret);
+/* Returns the Gen device ID */
+extern cl_int cl_device_get_version(cl_device_id device, cl_int *ver);
+
+#endif /* __CL_DEVICE_ID_H__ */
+
diff --git a/src/cl_driver.cpp b/src/cl_driver.cpp
new file mode 100644
index 0000000..19ac4ae
--- /dev/null
+++ b/src/cl_driver.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+extern "C" {
+#include "intel/intel_driver.h"
+#include "cl_utils.h"
+#include <stdlib.h>
+#include <string.h>
+}
+
+namespace
+{
+ /*! Just use c++ pre-main to initialize the call-backs */
+ struct OCLDriverCallBackInitializer
+ {
+ OCLDriverCallBackInitializer(void) {
+ intel_setup_callbacks();
+ }
+ };
+
+ /*! Set the call backs at pre-main time */
+ static OCLDriverCallBackInitializer cbInitializer;
+} /* namespace */
+
diff --git a/src/cl_driver.h b/src/cl_driver.h
new file mode 100644
index 0000000..76e5268
--- /dev/null
+++ b/src/cl_driver.h
@@ -0,0 +1,226 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __CL_DRIVER_H__
+#define __CL_DRIVER_H__
+
+#include <stdint.h>
+#include <stdlib.h>
+
+/* Various limitations we should remove actually */
+#define GEN_MAX_SURFACES 128
+#define GEN_MAX_SAMPLERS 16
+
+/**************************************************************************
+ * cl_driver:
+ * Hide behind some call backs the buffer allocation / deallocation ... This
+ * will allow us to make the use of a software performance simulator easier and
+ * to minimize the code specific for the HW and for the simulator
+ **************************************************************************/
+
+/* Encapsulates command buffer / data buffer / kernels */
+typedef struct _cl_buffer *cl_buffer;
+
+/* Encapsulates buffer manager */
+typedef struct _cl_buffer_mgr *cl_buffer_mgr;
+
+/* Encapsulates the driver backend functionalities */
+typedef struct _cl_driver *cl_driver;
+
+/* Encapsulates the gpgpu stream of commands */
+typedef struct _cl_gpgpu *cl_gpgpu;
+
+/**************************************************************************
+ * Driver
+ **************************************************************************/
+/* Create a new driver */
+typedef cl_driver (cl_driver_new_cb)(void);
+extern cl_driver_new_cb *cl_driver_new;
+
+/* Delete the driver */
+typedef void (cl_driver_delete_cb)(cl_driver);
+extern cl_driver_delete_cb *cl_driver_delete;
+
+/* Get the buffer manager from the driver */
+typedef cl_buffer_mgr (cl_driver_get_bufmgr_cb)(cl_driver);
+extern cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr;
+
+/* Get the Gen version from the driver */
+typedef uint32_t (cl_driver_get_ver_cb)(cl_driver);
+extern cl_driver_get_ver_cb *cl_driver_get_ver;
+
+/**************************************************************************
+ * GPGPU command streamer
+ **************************************************************************/
+/* Describe texture tiling */
+typedef enum cl_gpgpu_tiling {
+ GPGPU_NO_TILE = 0,
+ GPGPU_TILE_X = 1,
+ GPGPU_TILE_Y = 2,
+} cl_gpgpu_tiling;
+
+/* Cache control options */
+typedef enum cl_cache_control {
+ cc_gtt = 0x0,
+ cc_l3 = 0x1,
+ cc_llc = 0x2,
+ cc_llc_l3 = 0x3
+} cl_cache_control;
+
+/* Use this structure to bind kernels in the gpgpu state */
+typedef struct cl_gpgpu_kernel {
+ const char *name; /* kernel name and bo name */
+ uint32_t grf_blocks; /* register blocks kernel wants (in 8 reg blocks) */
+ uint32_t cst_sz; /* total size of all constants */
+ cl_buffer bo; /* kernel code in the proper addr space */
+ int32_t barrierID; /* barrierID for _this_ kernel */
+ uint32_t use_slm:1; /* For gen7 (automatic barrier management) */
+ uint32_t thread_n:15; /* For gen7 (automatic barrier management) */
+ uint32_t slm_sz:16; /* For gen7 (automatic SLM allocation) */
+} cl_gpgpu_kernel;
+
+/* Create a new gpgpu state */
+typedef cl_gpgpu (cl_gpgpu_new_cb)(cl_driver);
+extern cl_gpgpu_new_cb *cl_gpgpu_new;
+
+/* Delete the gpgpu state */
+typedef void (cl_gpgpu_delete_cb)(cl_gpgpu);
+extern cl_gpgpu_delete_cb *cl_gpgpu_delete;
+
+/* Bind a regular unformatted buffer */
+typedef void (cl_gpgpu_bind_buf_cb)(cl_gpgpu, cl_buffer, uint32_t offset, uint32_t cchint);
+extern cl_gpgpu_bind_buf_cb *cl_gpgpu_bind_buf;
+
+/* Set a 2d texture */
+typedef void (cl_gpgpu_bind_image2D_cb)(cl_gpgpu state,
+ int32_t index,
+ cl_buffer obj_bo,
+ uint32_t format,
+ int32_t w,
+ int32_t h,
+ int pitch,
+ cl_gpgpu_tiling tiling);
+extern cl_gpgpu_bind_image2D_cb *cl_gpgpu_bind_image2D;
+
+/* Setup a stack */
+typedef void (cl_gpgpu_set_stack_cb)(cl_gpgpu, uint32_t offset, uint32_t size, uint32_t cchint);
+extern cl_gpgpu_set_stack_cb *cl_gpgpu_set_stack;
+
+/* Configure internal state */
+typedef void (cl_gpgpu_state_init_cb)(cl_gpgpu, uint32_t max_threads, uint32_t size_cs_entry);
+extern cl_gpgpu_state_init_cb *cl_gpgpu_state_init;
+
+/* Set the buffer object where to report performance counters */
+typedef void (cl_gpgpu_set_perf_counters_cb)(cl_gpgpu, cl_buffer perf);
+extern cl_gpgpu_set_perf_counters_cb *cl_gpgpu_set_perf_counters;
+
+/* Fills current constant buffer with data */
+typedef void (cl_gpgpu_upload_constants_cb)(cl_gpgpu, const void* data, uint32_t size);
+extern cl_gpgpu_upload_constants_cb *cl_gpgpu_upload_constants;
+
+/* Setup all indirect states */
+typedef void (cl_gpgpu_states_setup_cb)(cl_gpgpu, cl_gpgpu_kernel *kernel);
+extern cl_gpgpu_states_setup_cb *cl_gpgpu_states_setup;
+
+/* Upload the constant samplers as specified inside the OCL kernel */
+typedef void (cl_gpgpu_upload_samplers_cb)(cl_gpgpu *state, const void *data, uint32_t n);
+extern cl_gpgpu_upload_samplers_cb *cl_gpgpu_upload_samplers;
+
+/* Set a sampler */
+typedef void (cl_gpgpu_set_sampler_cb)(cl_gpgpu, uint32_t index, uint32_t non_normalized);
+extern cl_gpgpu_set_sampler_cb *cl_gpgpu_set_sampler;
+
+/* Allocate the batch buffer and return the BO used for the batch buffer */
+typedef void (cl_gpgpu_batch_reset_cb)(cl_gpgpu, size_t sz);
+extern cl_gpgpu_batch_reset_cb *cl_gpgpu_batch_reset;
+
+/* Atomic begin, pipeline select, urb, pipeline state and constant buffer */
+typedef void (cl_gpgpu_batch_start_cb)(cl_gpgpu);
+extern cl_gpgpu_batch_start_cb *cl_gpgpu_batch_start;
+
+/* atomic end with possibly inserted flush */
+typedef void (cl_gpgpu_batch_end_cb)(cl_gpgpu, int32_t flush_mode);
+extern cl_gpgpu_batch_end_cb *cl_gpgpu_batch_end;
+
+/* Flush the command buffer */
+typedef void (cl_gpgpu_flush_cb)(cl_gpgpu);
+extern cl_gpgpu_flush_cb *cl_gpgpu_flush;
+
+/* Will spawn all threads */
+typedef void (cl_gpgpu_walker_cb)(cl_gpgpu,
+ uint32_t simd_sz,
+ uint32_t thread_n,
+ const size_t global_wk_off[3],
+ const size_t global_wk_sz[3],
+ const size_t local_wk_sz[3]);
+extern cl_gpgpu_walker_cb *cl_gpgpu_walker;
+
+/**************************************************************************
+ * Buffer
+ **************************************************************************/
+/* Allocate a buffer */
+typedef cl_buffer (cl_buffer_alloc_cb)(cl_buffer_mgr, const char*, unsigned long, unsigned long);
+extern cl_buffer_alloc_cb *cl_buffer_alloc;
+
+/* Unref a buffer and destroy it if no more ref */
+typedef void (cl_buffer_unreference_cb)(cl_buffer);
+extern cl_buffer_unreference_cb *cl_buffer_unreference;
+
+/* Add one more ref on a buffer */
+typedef void (cl_buffer_reference_cb)(cl_buffer);
+extern cl_buffer_reference_cb *cl_buffer_reference;
+
+/* Map a buffer */
+typedef int (cl_buffer_map_cb)(cl_buffer, uint32_t write_enable);
+extern cl_buffer_map_cb *cl_buffer_map;
+
+/* Unmap a buffer */
+typedef int (cl_buffer_unmap_cb)(cl_buffer);
+extern cl_buffer_unmap_cb *cl_buffer_unmap;
+
+/* Get the virtual address (when mapped) */
+typedef void* (cl_buffer_get_virtual_cb)(cl_buffer);
+extern cl_buffer_get_virtual_cb *cl_buffer_get_virtual;
+
+/* Get the size of the buffer */
+typedef size_t (cl_buffer_get_size_cb)(cl_buffer);
+extern cl_buffer_get_size_cb *cl_buffer_get_size;
+
+/* Pin a buffer */
+typedef int (cl_buffer_pin_cb)(cl_buffer, uint32_t alignment);
+extern cl_buffer_pin_cb *cl_buffer_pin;
+
+/* Unpin a buffer */
+typedef int (cl_buffer_unpin_cb)(cl_buffer);
+extern cl_buffer_unpin_cb *cl_buffer_unpin;
+
+/* Fill data in the buffer */
+typedef int (cl_buffer_subdata_cb)(cl_buffer, unsigned long, unsigned long, const void*);
+extern cl_buffer_subdata_cb *cl_buffer_subdata;
+
+/* Wait for all pending rendering for this buffer to complete */
+typedef int (cl_buffer_wait_rendering_cb) (cl_buffer);
+extern cl_buffer_wait_rendering_cb *cl_buffer_wait_rendering;
+
+/* Get the device id */
+typedef int (cl_driver_get_device_id_cb)(void);
+extern cl_driver_get_device_id_cb *cl_driver_get_device_id;
+
+#endif /* __CL_DRIVER_H__ */
+
diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c
new file mode 100644
index 0000000..66d805d
--- /dev/null
+++ b/src/cl_driver_defs.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "cl_driver.h"
+#include "cl_utils.h"
+#include <stdlib.h>
+
+/* Driver */
+LOCAL cl_driver_new_cb *cl_driver_new = NULL;
+LOCAL cl_driver_delete_cb *cl_driver_delete = NULL;
+LOCAL cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr = NULL;
+LOCAL cl_driver_get_ver_cb *cl_driver_get_ver = NULL;
+LOCAL cl_driver_get_device_id_cb *cl_driver_get_device_id = NULL;
+
+/* Buffer */
+LOCAL cl_buffer_alloc_cb *cl_buffer_alloc = NULL;
+LOCAL cl_buffer_reference_cb *cl_buffer_reference = NULL;
+LOCAL cl_buffer_unreference_cb *cl_buffer_unreference = NULL;
+LOCAL cl_buffer_map_cb *cl_buffer_map = NULL;
+LOCAL cl_buffer_unmap_cb *cl_buffer_unmap = NULL;
+LOCAL cl_buffer_get_virtual_cb *cl_buffer_get_virtual = NULL;
+LOCAL cl_buffer_get_size_cb *cl_buffer_get_size = NULL;
+LOCAL cl_buffer_pin_cb *cl_buffer_pin = NULL;
+LOCAL cl_buffer_unpin_cb *cl_buffer_unpin = NULL;
+LOCAL cl_buffer_subdata_cb *cl_buffer_subdata = NULL;
+LOCAL cl_buffer_wait_rendering_cb *cl_buffer_wait_rendering = NULL;
+
+/* GPGPU */
+LOCAL cl_gpgpu_new_cb *cl_gpgpu_new = NULL;
+LOCAL cl_gpgpu_delete_cb *cl_gpgpu_delete = NULL;
+LOCAL cl_gpgpu_bind_buf_cb *cl_gpgpu_bind_buf = NULL;
+LOCAL cl_gpgpu_set_stack_cb *cl_gpgpu_set_stack = NULL;
+LOCAL cl_gpgpu_bind_image2D_cb *cl_gpgpu_bind_image2D = NULL;
+LOCAL cl_gpgpu_state_init_cb *cl_gpgpu_state_init = NULL;
+LOCAL cl_gpgpu_set_perf_counters_cb *cl_gpgpu_set_perf_counters = NULL;
+LOCAL cl_gpgpu_upload_constants_cb *cl_gpgpu_upload_constants = NULL;
+LOCAL cl_gpgpu_states_setup_cb *cl_gpgpu_states_setup = NULL;
+LOCAL cl_gpgpu_upload_samplers_cb *cl_gpgpu_upload_samplers = NULL;
+LOCAL cl_gpgpu_batch_reset_cb *cl_gpgpu_batch_reset = NULL;
+LOCAL cl_gpgpu_batch_start_cb *cl_gpgpu_batch_start = NULL;
+LOCAL cl_gpgpu_batch_end_cb *cl_gpgpu_batch_end = NULL;
+LOCAL cl_gpgpu_flush_cb *cl_gpgpu_flush = NULL;
+LOCAL cl_gpgpu_walker_cb *cl_gpgpu_walker = NULL;
+
diff --git a/src/cl_event.c b/src/cl_event.c
new file mode 100644
index 0000000..6539b05
--- /dev/null
+++ b/src/cl_event.c
@@ -0,0 +1,20 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+struct empty {int dummy;};
+
diff --git a/src/cl_event.h b/src/cl_event.h
new file mode 100644
index 0000000..879357c
--- /dev/null
+++ b/src/cl_event.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __CL_EVENT_H__
+#define __CL_EVENT_H__
+
+struct _cl_event {
+};
+
+#endif /* __CL_EVENT_H__ */
+
diff --git a/src/cl_gen75_device.h b/src/cl_gen75_device.h
new file mode 100644
index 0000000..e72ab0b
--- /dev/null
+++ b/src/cl_gen75_device.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/* Common fields for both SNB devices (either GT1 or GT2)
+ */
+.max_parameter_size = 256,
+.global_mem_cache_line_size = 128, /* XXX */
+.global_mem_cache_size = 8 << 10, /* XXX */
+.local_mem_type = CL_GLOBAL,
+.local_mem_size = 64 << 10,
+
+#include "cl_gt_device.h"
+
diff --git a/src/cl_gen7_device.h b/src/cl_gen7_device.h
new file mode 100644
index 0000000..5886103
--- /dev/null
+++ b/src/cl_gen7_device.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/* Common fields for both IVB devices (either GT1 or GT2) */
+.max_parameter_size = 256,
+.global_mem_cache_line_size = 128, /* XXX */
+.global_mem_cache_size = 8 << 10, /* XXX */
+.local_mem_type = CL_GLOBAL,
+.local_mem_size = 64 << 10,
+
+#include "cl_gt_device.h"
+
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
new file mode 100644
index 0000000..8717a16
--- /dev/null
+++ b/src/cl_gt_device.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/* Common fields for both all GT devices (IVB / SNB) */
+.device_type = CL_DEVICE_TYPE_GPU,
+.vendor_id = 0, /* == device_id (set when requested) */
+.max_work_item_dimensions = 3,
+.preferred_vector_width_char = 16,
+.preferred_vector_width_short = 16,
+.preferred_vector_width_int = 16,
+.preferred_vector_width_long = 16,
+.preferred_vector_width_float = 16,
+.preferred_vector_width_double = 0,
+.preferred_vector_width_half = 0,
+.native_vector_width_char = 16,
+.native_vector_width_short = 16,
+.native_vector_width_int = 16,
+.native_vector_width_long = 16,
+.native_vector_width_float = 16,
+.native_vector_width_double = 16,
+.native_vector_width_half = 16,
+.address_bits = 32,
+.max_mem_alloc_size = 128 * 1024 * 1024,
+.image_support = CL_FALSE,
+.max_read_image_args = 0,
+.max_write_image_args = 0,
+.image2d_max_width = 8192,
+.image2d_max_height = 8192,
+.image3d_max_width = 8192,
+.image3d_max_height = 8192,
+.image3d_max_depth = 8192,
+.max_samplers = 0,
+.mem_base_addr_align = sizeof(cl_uint) * 8,
+.min_data_type_align_size = sizeof(cl_uint),
+.single_fp_config = 0, /* XXX */
+.global_mem_cache_type = CL_READ_WRITE_CACHE,
+.global_mem_size = 4,
+.max_constant_buffer_size = 64 << 10,
+.max_constant_args = 8,
+.error_correction_support = CL_FALSE,
+.host_unified_memory = CL_FALSE,
+.profiling_timer_resolution = 80, /* ns */
+.endian_little = CL_TRUE,
+.available = CL_TRUE,
+.compiler_available = CL_FALSE, /* XXX */
+.execution_capabilities = CL_EXEC_KERNEL,
+.queue_properties = CL_QUEUE_PROFILING_ENABLE,
+.platform = NULL, /* == intel_platform (set when requested) */
+
+#define DECL_INFO_STRING(FIELD, STRING) \
+ .FIELD = STRING, \
+ .JOIN(FIELD,_sz) = sizeof(STRING) + 1,
+DECL_INFO_STRING(name, "Intel HD Graphics Family")
+DECL_INFO_STRING(vendor, "Intel")
+DECL_INFO_STRING(version, "OpenCL 1.10")
+DECL_INFO_STRING(profile, "FULL_PROFILE")
+DECL_INFO_STRING(opencl_c_version, "OpenCL 1.10")
+DECL_INFO_STRING(extensions, "")
+#undef DECL_INFO_STRING
+
+
diff --git a/src/cl_image.c b/src/cl_image.c
new file mode 100644
index 0000000..bba741d
--- /dev/null
+++ b/src/cl_image.c
@@ -0,0 +1,214 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "cl_image.h"
+#include "cl_utils.h"
+#include "intel/intel_defines.h"
+
+#include <assert.h>
+
+LOCAL cl_int
+cl_image_byte_per_pixel(const cl_image_format *fmt, uint32_t *bpp)
+{
+ assert(bpp);
+
+ const uint32_t type = fmt->image_channel_data_type;
+ const uint32_t order = fmt->image_channel_order;
+ switch (type) {
+#define DECL_BPP(DATA_TYPE, VALUE) case DATA_TYPE: *bpp = VALUE;
+ DECL_BPP(CL_SNORM_INT8, 1); break;
+ DECL_BPP(CL_SNORM_INT16, 2); break;
+ DECL_BPP(CL_UNORM_INT8, 1); break;
+ DECL_BPP(CL_UNORM_INT16, 2); break;
+ DECL_BPP(CL_UNORM_SHORT_565, 2);
+ if (order != CL_RGBx && order != CL_RGB)
+ return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ break;
+ DECL_BPP(CL_UNORM_SHORT_555, 2);
+ if (order != CL_RGBx && order != CL_RGB)
+ return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ break;
+ DECL_BPP(CL_UNORM_INT_101010, 4);
+ if (order != CL_RGBx && order != CL_RGB)
+ return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ break;
+ DECL_BPP(CL_SIGNED_INT8, 1); break;
+ DECL_BPP(CL_SIGNED_INT16, 2); break;
+ DECL_BPP(CL_SIGNED_INT32, 4); break;
+ DECL_BPP(CL_UNSIGNED_INT8, 1); break;
+ DECL_BPP(CL_UNSIGNED_INT16, 2); break;
+ DECL_BPP(CL_UNSIGNED_INT32, 4); break;
+ DECL_BPP(CL_HALF_FLOAT, 2); break;
+ DECL_BPP(CL_FLOAT, 4); break;
+#undef DECL_BPP
+ default: return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ };
+
+ switch (order) {
+ case CL_R: break;
+ case CL_A: break;
+ case CL_RA: *bpp *= 2; break;
+ case CL_RG: *bpp *= 2; break;
+ case CL_Rx: *bpp *= 2; break;
+ case CL_INTENSITY:
+ case CL_LUMINANCE:
+ if (type != CL_UNORM_INT8 && type != CL_UNORM_INT16 &&
+ type != CL_SNORM_INT8 && type != CL_SNORM_INT16 &&
+ type != CL_HALF_FLOAT && type != CL_FLOAT)
+ return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ break;
+ case CL_RGB:
+ case CL_RGBx:
+ if (type != CL_UNORM_SHORT_555 &&
+ type != CL_UNORM_SHORT_565 &&
+ type != CL_UNORM_INT_101010)
+ return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ break;
+ case CL_RGBA: *bpp *= 4; break;
+ case CL_ARGB:
+ case CL_BGRA:
+ if (type != CL_UNORM_INT8 && type != CL_SIGNED_INT8 &&
+ type != CL_SNORM_INT8 && type != CL_UNSIGNED_INT8)
+ return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ *bpp *= 4;
+ break;
+ default: return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ };
+
+ return CL_SUCCESS;
+}
+
+LOCAL uint32_t
+cl_image_get_intel_format(const cl_image_format *fmt)
+{
+ const uint32_t type = fmt->image_channel_data_type;
+ const uint32_t order = fmt->image_channel_order;
+ switch (order) {
+ case CL_R:
+ case CL_A:
+ case CL_INTENSITY:
+ case CL_LUMINANCE:
+ switch (type) {
+ case CL_HALF_FLOAT: return I965_SURFACEFORMAT_R16_FLOAT;
+ case CL_FLOAT: return I965_SURFACEFORMAT_R32_FLOAT;
+ case CL_SNORM_INT16: return I965_SURFACEFORMAT_R16_SNORM;
+ case CL_SNORM_INT8: return I965_SURFACEFORMAT_R8_SNORM;
+ case CL_UNORM_INT8: return I965_SURFACEFORMAT_R8_UNORM;
+ case CL_UNORM_INT16: return I965_SURFACEFORMAT_R16_UNORM;
+ case CL_SIGNED_INT8: return I965_SURFACEFORMAT_R8_SINT;
+ case CL_SIGNED_INT16: return I965_SURFACEFORMAT_R16_SINT;
+ case CL_SIGNED_INT32: return I965_SURFACEFORMAT_R32_SINT;
+ case CL_UNSIGNED_INT8: return I965_SURFACEFORMAT_R8_UINT;
+ case CL_UNSIGNED_INT16: return I965_SURFACEFORMAT_R16_UINT;
+ case CL_UNSIGNED_INT32: return I965_SURFACEFORMAT_R32_UINT;
+ default: return INTEL_UNSUPPORTED_FORMAT;
+ };
+ case CL_RG:
+ case CL_RA:
+ case CL_Rx:
+ switch (type) {
+ case CL_HALF_FLOAT: return I965_SURFACEFORMAT_R16G16_FLOAT;
+ case CL_FLOAT: return I965_SURFACEFORMAT_R32G32_FLOAT;
+ case CL_SNORM_INT16: return I965_SURFACEFORMAT_R16G16_SNORM;
+ case CL_SNORM_INT8: return I965_SURFACEFORMAT_R8G8_SNORM;
+ case CL_UNORM_INT8: return I965_SURFACEFORMAT_R8G8_UNORM;
+ case CL_UNORM_INT16: return I965_SURFACEFORMAT_R16G16_UNORM;
+ case CL_SIGNED_INT8: return I965_SURFACEFORMAT_R8G8_SINT;
+ case CL_SIGNED_INT16: return I965_SURFACEFORMAT_R16G16_SINT;
+ case CL_SIGNED_INT32: return I965_SURFACEFORMAT_R32G32_SINT;
+ case CL_UNSIGNED_INT8: return I965_SURFACEFORMAT_R8G8_UINT;
+ case CL_UNSIGNED_INT16: return I965_SURFACEFORMAT_R16G16_UINT;
+ case CL_UNSIGNED_INT32: return I965_SURFACEFORMAT_R32G32_UINT;
+ default: return INTEL_UNSUPPORTED_FORMAT;
+ };
+ case CL_RGB:
+ case CL_RGBx:
+ switch (type) {
+ case CL_UNORM_INT_101010: return I965_SURFACEFORMAT_R10G10B10A2_UNORM;
+ case CL_UNORM_SHORT_565:
+ case CL_UNORM_SHORT_555:
+ default: return INTEL_UNSUPPORTED_FORMAT;
+ };
+ case CL_RGBA:
+ switch (type) {
+ case CL_HALF_FLOAT: return I965_SURFACEFORMAT_R16G16B16A16_FLOAT;
+ case CL_FLOAT: return I965_SURFACEFORMAT_R32G32B32A32_FLOAT;
+ case CL_SNORM_INT16: return I965_SURFACEFORMAT_R16G16B16A16_SNORM;
+ case CL_SNORM_INT8: return I965_SURFACEFORMAT_R8G8B8A8_SNORM;
+ case CL_UNORM_INT8: return I965_SURFACEFORMAT_R8G8B8A8_UNORM;
+ case CL_UNORM_INT16: return I965_SURFACEFORMAT_R16G16B16A16_UNORM;
+ case CL_SIGNED_INT8: return I965_SURFACEFORMAT_R8G8B8A8_SINT;
+ case CL_SIGNED_INT16: return I965_SURFACEFORMAT_R16G16B16A16_SINT;
+ case CL_SIGNED_INT32: return I965_SURFACEFORMAT_R32G32B32A32_SINT;
+ case CL_UNSIGNED_INT8: return I965_SURFACEFORMAT_R8G8B8A8_UINT;
+ case CL_UNSIGNED_INT16: return I965_SURFACEFORMAT_R16G16B16A16_UINT;
+ case CL_UNSIGNED_INT32: return I965_SURFACEFORMAT_R32G32B32A32_UINT;
+ default: return INTEL_UNSUPPORTED_FORMAT;
+ };
+ case CL_ARGB: return INTEL_UNSUPPORTED_FORMAT;
+ case CL_BGRA:
+ switch (type) {
+ case CL_UNORM_INT8: return I965_SURFACEFORMAT_B8G8R8A8_UNORM;
+ default: return INTEL_UNSUPPORTED_FORMAT;
+ };
+ default: return INTEL_UNSUPPORTED_FORMAT;
+ };
+}
+
+static const uint32_t cl_image_order[] = {
+ CL_R, CL_A, CL_RG, CL_RA, CL_RGB, CL_RGBA, CL_BGRA, CL_ARGB,
+ CL_INTENSITY, CL_LUMINANCE, CL_Rx, CL_RGx, CL_RGBx
+};
+
+static const uint32_t cl_image_type[] = {
+ CL_SNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT8, CL_UNORM_INT16,
+ CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010,
+ CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32,
+ CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32,
+ CL_HALF_FLOAT, CL_FLOAT
+};
+
+static const size_t cl_image_order_n = SIZEOF32(cl_image_order);
+static const size_t cl_image_type_n = SIZEOF32(cl_image_type);
+
+cl_int
+cl_image_get_supported_fmt(cl_context ctx,
+ cl_mem_type image_type,
+ cl_uint num_entries,
+ cl_image_format *image_formats,
+ cl_uint *num_image_formats)
+{
+ size_t i, j, n = 0;
+ assert(image_formats);
+ for (i = 0; i < cl_image_order_n; ++i)
+ for (j = 0; j < cl_image_type_n; ++j) {
+ const cl_image_format fmt = {
+ .image_channel_order = cl_image_order[i],
+ .image_channel_data_type = cl_image_type[j]
+ };
+ const uint32_t intel_fmt = cl_image_get_intel_format(&fmt);
+ if (intel_fmt == INTEL_UNSUPPORTED_FORMAT)
+ continue;
+ if (n < num_entries && image_formats) image_formats[n] = fmt;
+ n++;
+ }
+ if (num_image_formats) *num_image_formats = n;
+ return CL_SUCCESS;
+}
+
diff --git a/src/cl_image.h b/src/cl_image.h
new file mode 100644
index 0000000..4afb4d4
--- /dev/null
+++ b/src/cl_image.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __CL_IMAGE_H__
+#define __CL_IMAGE_H__
+
+#include "cl_internals.h"
+#include "CL/cl.h"
+#include <stdint.h>
+
+/* Returned when the OCL format is not supported */
+#define INTEL_UNSUPPORTED_FORMAT ((uint32_t) ~0x0u)
+
+/* Compute the number of bytes per pixel if the format is supported */
+extern cl_int cl_image_byte_per_pixel(const cl_image_format *fmt, uint32_t *bpp);
+
+/* Return the intel format for the given OCL format */
+extern uint32_t cl_image_get_intel_format(const cl_image_format *fmt);
+
+/* Return the list of formats supported by the API */
+extern cl_int cl_image_get_supported_fmt(cl_context context,
+ cl_mem_type image_type,
+ cl_uint num_entries,
+ cl_image_format *image_formats,
+ cl_uint *num_image_formats);
+
+#endif /* __CL_IMAGE_H__ */
+
diff --git a/src/cl_internals.h b/src/cl_internals.h
new file mode 100644
index 0000000..b2b25b2
--- /dev/null
+++ b/src/cl_internals.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __CL_INTERNALS_H__
+#define __CL_INTERNALS_H__
+
+/* We put a header to identify each object. This will make the programmer life
+ * easy if objects are wrongly used in the API
+ */
+#define CL_MAGIC_KERNEL_HEADER 0x1234567890abcdefLL
+#define CL_MAGIC_CONTEXT_HEADER 0x0ab123456789cdefLL
+#define CL_MAGIC_PROGRAM_HEADER 0x34560ab12789cdefLL
+#define CL_MAGIC_QUEUE_HEADER 0x83650a12b79ce4dfLL
+#define CL_MAGIC_SAMPLER_HEADER 0x686a0ecba79ce33fLL
+#define CL_MAGIC_MEM_HEADER 0x381a27b9ce6504dfLL
+#define CL_MAGIC_DEAD_HEADER 0xdeaddeaddeaddeadLL
+
+#endif /* __CL_INTERNALS_H__ */
+
diff --git a/src/cl_kernel.c b/src/cl_kernel.c
new file mode 100644
index 0000000..757ee42
--- /dev/null
+++ b/src/cl_kernel.c
@@ -0,0 +1,244 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "cl_kernel.h"
+#include "cl_program.h"
+#include "cl_device_id.h"
+#include "cl_context.h"
+#include "cl_mem.h"
+#include "cl_alloc.h"
+#include "cl_utils.h"
+#include "CL/cl.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <assert.h>
+
+LOCAL void
+cl_kernel_delete(cl_kernel k)
+{
+ uint32_t i;
+ if (k == NULL) return;
+
+ /* We are not done with the kernel */
+ if (atomic_dec(&k->ref_n) > 1) return;
+ /* Release one reference on all bos we own */
+ if (k->bo) cl_buffer_unreference(k->bo);
+ if (k->const_bo) cl_buffer_unreference(k->const_bo);
+ /* This will be true for kernels created by clCreateKernel */
+ if (k->ref_its_program) cl_program_delete(k->program);
+ /* Release the curbe if allocated */
+ if (k->curbe) cl_free(k->curbe);
+ /* Release the argument array if required */
+ if (k->args) {
+ for (i = 0; i < k->arg_n; ++i)
+ if (k->args[i].mem != NULL)
+ cl_mem_delete(k->args[i].mem);
+ cl_free(k->args);
+ }
+ k->magic = CL_MAGIC_DEAD_HEADER; /* For safety */
+ cl_free(k);
+}
+
+LOCAL cl_kernel
+cl_kernel_new(cl_program p)
+{
+ cl_kernel k = NULL;
+ TRY_ALLOC_NO_ERR (k, CALLOC(struct _cl_kernel));
+ k->ref_n = 1;
+ k->magic = CL_MAGIC_KERNEL_HEADER;
+ k->program = p;
+
+exit:
+ return k;
+error:
+ cl_kernel_delete(k);
+ k = NULL;
+ goto exit;
+}
+
+LOCAL const char*
+cl_kernel_get_name(cl_kernel k)
+{
+ if (UNLIKELY(k == NULL)) return NULL;
+ return gbe_kernel_get_name(k->opaque);
+}
+
+LOCAL void
+cl_kernel_add_ref(cl_kernel k)
+{
+ atomic_inc(&k->ref_n);
+}
+
+LOCAL cl_int
+cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value)
+{
+ uint32_t offset; /* where to patch */
+ enum gbe_arg_type arg_type; /* kind of argument */
+ size_t arg_sz; /* size of the argument */
+ cl_mem mem; /* for __global, __constant and image arguments */
+
+ if (UNLIKELY(index >= k->arg_n))
+ return CL_INVALID_ARG_INDEX;
+ arg_type = gbe_kernel_get_arg_type(k->opaque, index);
+ arg_sz = gbe_kernel_get_arg_size(k->opaque, index);
+ if (UNLIKELY(arg_type != GBE_ARG_LOCAL_PTR && arg_sz != sz))
+ return CL_INVALID_ARG_SIZE;
+
+ /* Copy the structure or the value directly into the curbe */
+ if (arg_type == GBE_ARG_VALUE) {
+ if (UNLIKELY(value == NULL))
+ return CL_INVALID_KERNEL_ARGS;
+ offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, index);
+ assert(offset + sz <= k->curbe_sz);
+ memcpy(k->curbe + offset, value, sz);
+ k->args[index].local_sz = 0;
+ k->args[index].is_set = 1;
+ k->args[index].mem = NULL;
+ return CL_SUCCESS;
+ }
+
+ /* For a local pointer just save the size */
+ if (arg_type == GBE_ARG_LOCAL_PTR) {
+ if (UNLIKELY(value != NULL))
+ return CL_INVALID_KERNEL_ARGS;
+ k->args[index].local_sz = sz;
+ k->args[index].is_set = 1;
+ k->args[index].mem = NULL;
+ return CL_SUCCESS;
+ }
+
+ /* Otherwise, we just need to check that this is a buffer */
+ if (UNLIKELY(value == NULL))
+ return CL_INVALID_KERNEL_ARGS;
+ mem = *(cl_mem*) value;
+ if (UNLIKELY(mem->magic != CL_MAGIC_MEM_HEADER))
+ return CL_INVALID_ARG_VALUE;
+ if (mem->is_image)
+ if (UNLIKELY(arg_type == GBE_ARG_IMAGE))
+ return CL_INVALID_ARG_VALUE;
+ cl_mem_add_ref(mem);
+ if (k->args[index].mem)
+ cl_mem_delete(k->args[index].mem);
+ k->args[index].mem = mem;
+ k->args[index].is_set = 1;
+ k->args[index].local_sz = 0;
+
+ return CL_SUCCESS;
+}
+
+LOCAL uint32_t
+cl_kernel_get_simd_width(cl_kernel k)
+{
+ assert(k != NULL);
+ return gbe_kernel_get_simd_width(k->opaque);
+}
+
+LOCAL void
+cl_kernel_setup(cl_kernel k, gbe_kernel opaque)
+{
+ cl_context ctx = k->program->ctx;
+ cl_buffer_mgr bufmgr = cl_context_get_bufmgr(ctx);
+
+ /* Allocate the gen code here */
+ const uint32_t code_sz = gbe_kernel_get_code_size(opaque);
+ const char *code = gbe_kernel_get_code(opaque);
+ k->bo = cl_buffer_alloc(bufmgr, "CL kernel", code_sz, 64u);
+ k->arg_n = gbe_kernel_get_arg_num(opaque);
+
+ /* Upload the code */
+ cl_buffer_subdata(k->bo, 0, code_sz, code);
+ k->opaque = opaque;
+
+ /* Create the curbe */
+ k->curbe_sz = gbe_kernel_get_curbe_size(k->opaque);
+}
+
+LOCAL cl_kernel
+cl_kernel_dup(cl_kernel from)
+{
+ cl_kernel to = NULL;
+
+ if (UNLIKELY(from == NULL))
+ return NULL;
+ TRY_ALLOC_NO_ERR (to, CALLOC(struct _cl_kernel));
+ to->bo = from->bo;
+ to->const_bo = from->const_bo;
+ to->opaque = from->opaque;
+ to->ref_n = 1;
+ to->magic = CL_MAGIC_KERNEL_HEADER;
+ to->program = from->program;
+ to->arg_n = from->arg_n;
+ to->curbe_sz = from->curbe_sz;
+ TRY_ALLOC_NO_ERR(to->args, cl_calloc(to->arg_n, sizeof(cl_argument)));
+ if (to->curbe_sz) TRY_ALLOC_NO_ERR(to->curbe, cl_calloc(1, to->curbe_sz));
+
+ /* Retain the bos */
+ if (from->bo) cl_buffer_reference(from->bo);
+ if (from->const_bo) cl_buffer_reference(from->const_bo);
+
+ /* We retain the program destruction since this kernel (user allocated)
+ * depends on the program for some of its pointers
+ */
+ assert(from->program);
+ cl_program_add_ref(from->program);
+ to->ref_its_program = CL_TRUE;
+
+exit:
+ return to;
+error:
+ cl_kernel_delete(to);
+ to = NULL;
+ goto exit;
+}
+
+LOCAL cl_int
+cl_kernel_work_group_sz(cl_kernel ker,
+ const size_t *local_wk_sz,
+ uint32_t wk_dim,
+ size_t *wk_grp_sz)
+{
+ cl_int err = CL_SUCCESS;
+ size_t sz = 0;
+ cl_uint i;
+
+ for (i = 0; i < wk_dim; ++i) {
+ const uint32_t required_sz = gbe_kernel_get_required_work_group_size(ker->opaque, i);
+ if (required_sz != 0 && required_sz != local_wk_sz[i]) {
+ err = CL_INVALID_WORK_ITEM_SIZE;
+ goto error;
+ }
+ }
+ sz = local_wk_sz[0];
+ for (i = 1; i < wk_dim; ++i)
+ sz *= local_wk_sz[i];
+ FATAL_IF (sz % 16, "Work group size must be a multiple of 16");
+ if (sz > ker->program->ctx->device->max_work_group_size) {
+ err = CL_INVALID_WORK_ITEM_SIZE;
+ goto error;
+ }
+
+error:
+ if (wk_grp_sz) *wk_grp_sz = sz;
+ return err;
+}
+
+
diff --git a/src/cl_kernel.h b/src/cl_kernel.h
new file mode 100644
index 0000000..0347d0a
--- /dev/null
+++ b/src/cl_kernel.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __CL_KERNEL_H__
+#define __CL_KERNEL_H__
+
+#include "cl_internals.h"
+#include "cl_driver.h"
+#include "program.h"
+#include "CL/cl.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+/* This is the kernel as it is interfaced by the compiler */
+struct _gbe_kernel;
+
+/* We need to save buffer data for relocation and binding and we must figure out
+ * if all arguments are properly set
+ */
+typedef struct cl_argument {
+ cl_mem mem; /* For image and regular buffers */
+ uint32_t local_sz:31; /* For __local size specification */
+ uint32_t is_set:1; /* All args must be set before NDRange */
+} cl_argument;
+
+/* One OCL function */
+struct _cl_kernel {
+ uint64_t magic; /* To identify it as a kernel */
+ volatile int ref_n; /* We reference count this object */
+ cl_buffer bo; /* The code itself */
+ cl_buffer const_bo; /* Buffer for all __constants values in the OCL program */
+ cl_program program; /* Owns this structure (and pointers) */
+ gbe_kernel opaque; /* (Opaque) compiler structure for the OCL kernel */
+ char *curbe; /* One curbe per kernel */
+ size_t curbe_sz; /* Size of it */
+ cl_argument *args; /* To track argument setting */
+ uint32_t arg_n:31; /* Number of arguments */
+ uint32_t ref_its_program:1; /* True only for the user kernel (created by clCreateKernel) */
+};
+
+/* Allocate an empty kernel */
+extern cl_kernel cl_kernel_new(cl_program);
+
+/* Destroy and deallocate an empty kernel */
+extern void cl_kernel_delete(cl_kernel);
+
+/* Setup the kernel with the given GBE Kernel */
+extern void cl_kernel_setup(cl_kernel k, gbe_kernel opaque);
+
+/* Get the kernel name */
+extern const char *cl_kernel_get_name(cl_kernel k);
+
+/* Get the simd width as used in the code */
+extern uint32_t cl_kernel_get_simd_width(cl_kernel k);
+
+/* When a kernel is created from outside, we just duplicate the structure we
+ * have internally and give it back to the user
+ */
+extern cl_kernel cl_kernel_dup(cl_kernel);
+
+/* Add one more reference on the kernel object */
+extern void cl_kernel_add_ref(cl_kernel);
+
+/* Set the argument before kernel execution */
+extern int cl_kernel_set_arg(cl_kernel,
+ uint32_t arg_index,
+ size_t arg_size,
+ const void *arg_value);
+
+/* Compute and check the work group size from the user provided local size */
+extern cl_int
+cl_kernel_work_group_sz(cl_kernel ker,
+ const size_t *local_wk_sz,
+ cl_uint wk_dim,
+ size_t *wk_grp_sz);
+
+#endif /* __CL_KERNEL_H__ */
+
diff --git a/src/cl_mem.c b/src/cl_mem.c
new file mode 100644
index 0000000..a0c4d41
--- /dev/null
+++ b/src/cl_mem.c
@@ -0,0 +1,393 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "cl_mem.h"
+#include "cl_image.h"
+#include "cl_context.h"
+#include "cl_utils.h"
+#include "cl_alloc.h"
+#include "cl_device_id.h"
+#include "cl_driver.h"
+
+#include "CL/cl.h"
+#include "CL/cl_intel.h"
+#include <assert.h>
+#include <stdio.h>
+
+static cl_mem
+cl_mem_allocate(cl_context ctx,
+ cl_mem_flags flags,
+ size_t sz,
+ cl_int is_tiled,
+ cl_int *errcode)
+{
+ cl_buffer_mgr bufmgr = NULL;
+ cl_mem mem = NULL;
+ cl_int err = CL_SUCCESS;
+ size_t alignment = 64;
+
+ assert(ctx);
+ FATAL_IF (flags & CL_MEM_ALLOC_HOST_PTR,
+ "CL_MEM_ALLOC_HOST_PTR unsupported"); /* XXX */
+ FATAL_IF (flags & CL_MEM_USE_HOST_PTR,
+ "CL_MEM_USE_HOST_PTR unsupported"); /* XXX */
+ if (UNLIKELY(sz == 0)) {
+ err = CL_INVALID_BUFFER_SIZE;
+ goto error;
+ }
+
+ /* Allocate and inialize the structure itself */
+ TRY_ALLOC (mem, CALLOC(struct _cl_mem));
+ mem->ref_n = 1;
+ mem->magic = CL_MAGIC_MEM_HEADER;
+ mem->flags = flags;
+
+ /* Pinning will require stricter alignment rules */
+ if ((flags & CL_MEM_PINNABLE) || is_tiled)
+ alignment = 4096;
+
+ /* Allocate space in memory */
+ bufmgr = cl_context_get_bufmgr(ctx);
+ assert(bufmgr);
+ mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment);
+ if (UNLIKELY(mem->bo == NULL)) {
+ err = CL_MEM_ALLOCATION_FAILURE;
+ goto error;
+ }
+
+ /* Append the buffer in the context buffer list */
+ pthread_mutex_lock(&ctx->buffer_lock);
+ mem->next = ctx->buffers;
+ if (ctx->buffers != NULL)
+ ctx->buffers->prev = mem;
+ ctx->buffers = mem;
+ pthread_mutex_unlock(&ctx->buffer_lock);
+ mem->ctx = ctx;
+ cl_context_add_ref(ctx);
+
+exit:
+ if (errcode)
+ *errcode = err;
+ return mem;
+error:
+ cl_mem_delete(mem);
+ mem = NULL;
+ goto exit;
+
+}
+
+LOCAL cl_mem
+cl_mem_new(cl_context ctx,
+ cl_mem_flags flags,
+ size_t sz,
+ void *data,
+ cl_int *errcode_ret)
+{
+ cl_int err = CL_SUCCESS;
+ cl_mem mem = NULL;
+
+ /* Check flags consistency */
+ if (UNLIKELY(flags & CL_MEM_COPY_HOST_PTR && data == NULL)) {
+ err = CL_INVALID_HOST_PTR;
+ goto error;
+ }
+
+ /* Create the buffer in video memory */
+ mem = cl_mem_allocate(ctx, flags, sz, CL_FALSE, &err);
+ if (mem == NULL || err != CL_SUCCESS)
+ goto error;
+
+ /* Copy the data if required */
+ if (flags & CL_MEM_COPY_HOST_PTR) /* TODO check other flags too */
+ cl_buffer_subdata(mem->bo, 0, sz, data);
+
+exit:
+ if (errcode_ret)
+ *errcode_ret = err;
+ return mem;
+error:
+ cl_mem_delete(mem);
+ mem = NULL;
+ goto exit;
+}
+
+static void
+cl_mem_copy_data_linear(cl_mem mem,
+ size_t w,
+ size_t h,
+ size_t pitch,
+ uint32_t bpp,
+ void *data)
+{
+ size_t x, y, p;
+ char *dst;
+ cl_buffer_map(mem->bo, 1);
+ dst = cl_buffer_get_virtual(mem->bo);
+ for (y = 0; y < h; ++y) {
+ char *src = (char*) data + pitch * y;
+ for (x = 0; x < w; ++x) {
+ for (p = 0; p < bpp; ++p)
+ dst[p] = src[p];
+ dst += bpp;
+ src += bpp;
+ }
+ }
+ cl_buffer_unmap(mem->bo);
+}
+
+static const uint32_t tile_sz = 4096; /* 4KB per tile */
+static const uint32_t tilex_w = 512; /* tileX width in bytes */
+static const uint32_t tilex_h = 8; /* tileX height in number of rows */
+static const uint32_t tiley_w = 128; /* tileY width in bytes */
+static const uint32_t tiley_h = 32; /* tileY height in number of rows */
+
+static void
+cl_mem_copy_data_tilex(cl_mem mem,
+ size_t w,
+ size_t h,
+ size_t pitch,
+ uint32_t bpp,
+ void *data)
+{
+ const size_t tile_w = tilex_w;
+ const size_t tile_h = tilex_h;
+ const size_t aligned_pitch = ALIGN(w * bpp, tile_w);
+ const size_t aligned_height = ALIGN(h, tile_h);
+ const size_t tilex_n = aligned_pitch / tile_w;
+ const size_t tiley_n = aligned_height / tile_h;
+ size_t x, y, tilex, tiley;
+ char *img = NULL;
+ char *end = (char*) data + pitch * h;
+
+ cl_buffer_map(mem->bo, 1);
+ img = cl_buffer_get_virtual(mem->bo);
+ for (tiley = 0; tiley < tiley_n; ++tiley)
+ for (tilex = 0; tilex < tilex_n; ++tilex) {
+ char *tile = img + (tilex + tiley * tilex_n) * tile_sz;
+ for (y = 0; y < tile_h; ++y) {
+ char *src = (char*) data + (tiley*tile_h+y)*pitch + tilex*tile_w;
+ char *dst = tile + y*tile_w;
+ for (x = 0; x < tile_w; ++x, ++dst, ++src) {
+ if ((uintptr_t) src < (uintptr_t) end)
+ *dst = *src;
+ }
+ }
+ }
+ cl_buffer_unmap(mem->bo);
+}
+
+static void
+cl_mem_copy_data_tiley(cl_mem mem,
+ size_t w,
+ size_t h,
+ size_t pitch,
+ uint32_t bpp,
+ void *data)
+{
+ const size_t tile_w = tiley_w;
+ const size_t tile_h = tiley_h;
+ const size_t aligned_pitch = ALIGN(w * bpp, tile_w);
+ const size_t aligned_height = ALIGN(h, tile_h);
+ const size_t tilex_n = aligned_pitch / tile_w;
+ const size_t tiley_n = aligned_height / tile_h;
+ size_t x, y, tilex, tiley, byte;
+ char *img = NULL;
+ char *end = (char*) data + pitch * h;
+
+ cl_buffer_map(mem->bo, 1);
+ img = cl_buffer_get_virtual(mem->bo);
+ for (tiley = 0; tiley < tiley_n; ++tiley)
+ for (tilex = 0; tilex < tilex_n; ++tilex) {
+ char *tile = img + (tiley * tilex_n + tilex) * tile_sz;
+ for (x = 0; x < tile_w; x += 16) {
+ char *src = (char*) data + tiley*tile_h*pitch + tilex*tile_w+x;
+ char *dst = tile + x*tile_h;
+ for (y = 0; y < tile_h; ++y, dst += 16, src += pitch) {
+ for (byte = 0; byte < 16; ++byte)
+ if ((uintptr_t) src + byte < (uintptr_t) end)
+ dst[byte] = src[byte];
+ }
+ }
+ }
+ cl_buffer_unmap(mem->bo);
+}
+
+LOCAL cl_mem
+cl_mem_new_image2D(cl_context ctx,
+ cl_mem_flags flags,
+ const cl_image_format *fmt,
+ size_t w,
+ size_t h,
+ size_t pitch,
+ void *data,
+ cl_int *errcode_ret)
+{
+ cl_int err = CL_SUCCESS;
+ cl_mem mem = NULL;
+ uint32_t bpp = 0, intel_fmt = INTEL_UNSUPPORTED_FORMAT;
+ size_t sz = 0, aligned_pitch = 0, aligned_h;
+ cl_image_tiling_t tiling = CL_NO_TILE;
+
+ /* Check flags consistency */
+ if (UNLIKELY((flags & CL_MEM_COPY_HOST_PTR) && data == NULL)) {
+ err = CL_INVALID_HOST_PTR;
+ goto error;
+ }
+
+ /* Get the size of each pixel */
+ if (UNLIKELY((err = cl_image_byte_per_pixel(fmt, &bpp)) != CL_SUCCESS))
+ goto error;
+
+ /* Only a sub-set of the formats are supported */
+ intel_fmt = cl_image_get_intel_format(fmt);
+ if (UNLIKELY(intel_fmt == INTEL_UNSUPPORTED_FORMAT)) {
+ err = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ goto error;
+ }
+
+ /* See if the user parameters match */
+#define DO_IMAGE_ERROR \
+ do { \
+ err = CL_INVALID_IMAGE_SIZE; \
+ goto error; \
+ } while (0);
+ if (UNLIKELY(w == 0)) DO_IMAGE_ERROR;
+ if (UNLIKELY(h == 0)) DO_IMAGE_ERROR;
+ if (UNLIKELY(w > ctx->device->image2d_max_width)) DO_IMAGE_ERROR;
+ if (UNLIKELY(h > ctx->device->image2d_max_height)) DO_IMAGE_ERROR;
+ if (UNLIKELY(bpp*w > pitch)) DO_IMAGE_ERROR;
+#undef DO_IMAGE_ERROR
+
+ /* Pick up tiling mode (we do only linear on SNB) */
+ if (cl_driver_get_ver(ctx->drv) != 6)
+ tiling = CL_TILE_Y;
+
+ /* Tiling requires to align both pitch and height */
+ if (tiling == CL_NO_TILE) {
+ aligned_pitch = w * bpp;
+ aligned_h = h;
+ } else if (tiling == CL_TILE_X) {
+ aligned_pitch = ALIGN(w * bpp, tilex_w);
+ aligned_h = ALIGN(h, tilex_h);
+ } else if (tiling == CL_TILE_Y) {
+ aligned_pitch = ALIGN(w * bpp, tiley_w);
+ aligned_h = ALIGN(h, tiley_h);
+ }
+
+ sz = aligned_pitch * aligned_h;
+ mem = cl_mem_allocate(ctx, flags, sz, tiling != CL_NO_TILE, &err);
+ if (mem == NULL || err != CL_SUCCESS)
+ goto error;
+
+ /* Copy the data if required */
+ if (flags & CL_MEM_COPY_HOST_PTR) {
+ if (tiling == CL_NO_TILE)
+ cl_mem_copy_data_linear(mem, w, h, pitch, bpp, data);
+ else if (tiling == CL_TILE_X)
+ cl_mem_copy_data_tilex(mem, w, h, pitch, bpp, data);
+ else if (tiling == CL_TILE_Y)
+ cl_mem_copy_data_tiley(mem, w, h, pitch, bpp, data);
+ }
+
+ mem->w = w;
+ mem->h = h;
+ mem->fmt = *fmt;
+ mem->intel_fmt = intel_fmt;
+ mem->bpp = bpp;
+ mem->is_image = 1;
+ mem->pitch = aligned_pitch;
+ mem->tiling = tiling;
+
+exit:
+ if (errcode_ret)
+ *errcode_ret = err;
+ return mem;
+error:
+ cl_mem_delete(mem);
+ mem = NULL;
+ goto exit;
+}
+
+LOCAL void
+cl_mem_delete(cl_mem mem)
+{
+ if (UNLIKELY(mem == NULL))
+ return;
+ if (atomic_dec(&mem->ref_n) > 1)
+ return;
+ if (LIKELY(mem->bo != NULL))
+ cl_buffer_unreference(mem->bo);
+
+ /* Remove it from the list */
+ assert(mem->ctx);
+ pthread_mutex_lock(&mem->ctx->buffer_lock);
+ if (mem->prev)
+ mem->prev->next = mem->next;
+ if (mem->next)
+ mem->next->prev = mem->prev;
+ if (mem->prev == NULL && mem->next == NULL)
+ mem->ctx->buffers = NULL;
+ pthread_mutex_unlock(&mem->ctx->buffer_lock);
+ cl_context_delete(mem->ctx);
+
+ cl_free(mem);
+}
+
+LOCAL void
+cl_mem_add_ref(cl_mem mem)
+{
+ assert(mem);
+ atomic_inc(&mem->ref_n);
+}
+
+LOCAL void*
+cl_mem_map(cl_mem mem)
+{
+ cl_buffer_map(mem->bo, 1);
+ assert(cl_buffer_get_virtual(mem->bo));
+ return cl_buffer_get_virtual(mem->bo);
+}
+
+LOCAL cl_int
+cl_mem_unmap(cl_mem mem)
+{
+ cl_buffer_unmap(mem->bo);
+ return CL_SUCCESS;
+}
+
+LOCAL cl_int
+cl_mem_pin(cl_mem mem)
+{
+ assert(mem);
+ if (UNLIKELY((mem->flags & CL_MEM_PINNABLE) == 0))
+ return CL_INVALID_MEM;
+ cl_buffer_pin(mem->bo, 4096);
+ return CL_SUCCESS;
+}
+
+LOCAL cl_int
+cl_mem_unpin(cl_mem mem)
+{
+ assert(mem);
+ if (UNLIKELY((mem->flags & CL_MEM_PINNABLE) == 0))
+ return CL_INVALID_MEM;
+ cl_buffer_unpin(mem->bo);
+ return CL_SUCCESS;
+}
+
diff --git a/src/cl_mem.h b/src/cl_mem.h
new file mode 100644
index 0000000..2cb983d
--- /dev/null
+++ b/src/cl_mem.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __CL_MEM_H__
+#define __CL_MEM_H__
+
+#include "cl_internals.h"
+#include "cl_driver.h"
+#include "CL/cl.h"
+
+typedef enum cl_image_tiling {
+ CL_NO_TILE = 0,
+ CL_TILE_X = 1,
+ CL_TILE_Y = 2
+} cl_image_tiling_t;
+
+/* Used for buffers and images */
+struct _cl_mem {
+ uint64_t magic; /* To identify it as a memory object */
+ volatile int ref_n; /* This object is reference counted */
+ cl_buffer bo; /* Data in GPU memory */
+ cl_mem prev, next; /* We chain the memory buffers together */
+ cl_context ctx; /* Context it belongs to */
+ cl_mem_flags flags; /* Flags specified at the creation time */
+ uint32_t is_image; /* Indicate if this is an image or not */
+ cl_image_format fmt; /* only for images */
+ size_t w,h,depth,pitch; /* only for images (depth is only for 3d images) */
+ uint32_t intel_fmt; /* format to provide in the surface state */
+ uint32_t bpp; /* number of bytes per pixel */
+ cl_image_tiling_t tiling; /* only IVB+ supports TILE_[X,Y] (image only) */
+};
+
+/* Create a new memory object and initialize it with possible user data */
+extern cl_mem cl_mem_new(cl_context, cl_mem_flags, size_t, void*, cl_int*);
+
+/* Idem but this is an image */
+extern cl_mem cl_mem_new_image2D(cl_context,
+ cl_mem_flags,
+ const cl_image_format*,
+ size_t w,
+ size_t h,
+ size_t pitch,
+ void *,
+ cl_int *);
+
+/* Unref the object and delete it if no more reference */
+extern void cl_mem_delete(cl_mem);
+
+/* Add one more reference to this object */
+extern void cl_mem_add_ref(cl_mem);
+
+/* Directly map a memory object */
+extern void *cl_mem_map(cl_mem);
+
+/* Unmap a memory object */
+extern cl_int cl_mem_unmap(cl_mem);
+
+/* Pin/unpin the buffer in memory (you must be root) */
+extern cl_int cl_mem_pin(cl_mem);
+extern cl_int cl_mem_unpin(cl_mem);
+
+#endif /* __CL_MEM_H__ */
+
diff --git a/src/cl_platform_id.c b/src/cl_platform_id.c
new file mode 100644
index 0000000..3c95513
--- /dev/null
+++ b/src/cl_platform_id.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "cl_platform_id.h"
+#include "cl_internals.h"
+#include "cl_utils.h"
+#include "CL/cl.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+struct _cl_platform_id {
+ const char *profile;
+ const char *version;
+ const char *name;
+ const char *vendor;
+ const char *extensions;
+ size_t profile_sz;
+ size_t version_sz;
+ size_t name_sz;
+ size_t vendor_sz;
+ size_t extensions_sz;
+};
+
+#define DECL_INFO_STRING(FIELD, STRING) \
+ .FIELD = STRING, \
+ .JOIN(FIELD,_sz) = sizeof(STRING) + 1,
+
+static struct _cl_platform_id intel_platform_data = {
+ DECL_INFO_STRING(profile, "FULL_PROFILE")
+ DECL_INFO_STRING(version, "OpenCL 1.1")
+ DECL_INFO_STRING(name, "Experiment Intel Gen OCL Driver")
+ DECL_INFO_STRING(vendor, "Intel")
+ DECL_INFO_STRING(extensions, "")
+};
+
+#undef DECL_INFO_STRING
+
+/* Intel platform (only GPU now) */
+cl_platform_id const intel_platform = &intel_platform_data;
+
+LOCAL cl_int
+cl_get_platform_ids(cl_uint num_entries,
+ cl_platform_id * platforms,
+ cl_uint * num_platforms)
+{
+ if (num_platforms != NULL)
+ *num_platforms = 1;
+ if (UNLIKELY(platforms == NULL))
+ return CL_SUCCESS;
+ if (UNLIKELY(num_entries == 0))
+ return CL_INVALID_VALUE;
+ if (UNLIKELY(num_platforms == NULL && platforms == NULL))
+ return CL_SUCCESS;
+#if 0
+ if (UNLIKELY(num_platforms == NULL && platforms != NULL))
+ return CL_INVALID_VALUE;
+#endif
+ if (UNLIKELY(num_platforms != NULL && platforms == NULL))
+ return CL_INVALID_VALUE;
+
+ /* Easy right now, only one platform is supported */
+ *platforms = intel_platform;
+ return CL_SUCCESS;
+}
+
+#define DECL_FIELD(CASE,FIELD) \
+ case JOIN(CL_,CASE): \
+ if (param_value_size < intel_platform->JOIN(FIELD,_sz)) \
+ return CL_INVALID_VALUE; \
+ if (param_value_size_ret != NULL) \
+ *param_value_size_ret = intel_platform->JOIN(FIELD,_sz); \
+ memcpy(param_value, \
+ intel_platform->FIELD, \
+ intel_platform->JOIN(FIELD,_sz)); \
+ return CL_SUCCESS;
+
+LOCAL cl_int
+cl_get_platform_into(cl_platform_id platform,
+ cl_platform_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret)
+{
+ /* Only one platform. This is easy */
+ if (UNLIKELY(platform != NULL && platform != intel_platform))
+ return CL_INVALID_PLATFORM;
+ if (UNLIKELY(param_value == NULL))
+ return CL_INVALID_VALUE;
+
+ /* Fetch the platform inform */
+ switch (param_name) {
+ DECL_FIELD (PLATFORM_PROFILE, profile);
+ DECL_FIELD (PLATFORM_VERSION, version);
+ DECL_FIELD (PLATFORM_NAME, name);
+ DECL_FIELD (PLATFORM_VENDOR, vendor);
+ DECL_FIELD (PLATFORM_EXTENSIONS, extensions);
+ default: return CL_INVALID_VALUE;
+ }
+}
+
+#undef DECL_FIELD
+
diff --git a/src/cl_platform_id.h b/src/cl_platform_id.h
new file mode 100644
index 0000000..c096150
--- /dev/null
+++ b/src/cl_platform_id.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __CL_PLATFORM_ID_H__
+#define __CL_PLATFORM_ID_H__
+
+#include "cl_internals.h"
+#include "CL/cl.h"
+
+/* Platform implemented by this run-time */
+extern cl_platform_id const intel_platform;
+
+/* Return the valid platform */
+extern cl_int cl_get_platform_ids(cl_uint num_entries,
+ cl_platform_id * platforms,
+ cl_uint * num_platforms);
+
+/* Return information for the current platform */
+extern cl_int cl_get_platform_into(cl_platform_id platform,
+ cl_platform_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret);
+
+#endif /* __CL_PLATFORM_ID_H__ */
+
diff --git a/src/cl_program.c b/src/cl_program.c
new file mode 100644
index 0000000..42ef822
--- /dev/null
+++ b/src/cl_program.c
@@ -0,0 +1,330 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "cl_kernel.h"
+#include "cl_program.h"
+#include "cl_device_id.h"
+#include "cl_context.h"
+#include "cl_alloc.h"
+#include "cl_utils.h"
+#include "CL/cl.h"
+#include "CL/cl_intel.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
+
+static void
+cl_program_release_sources(cl_program p)
+{
+ uint32_t i;
+ if (p->sources == NULL) return;
+ for (i = 0; i < p->src_n; ++i)
+ if (p->sources[i]) cl_free(p->sources[i]);
+ cl_free(p->sources);
+ p->sources = NULL;
+ p->src_n = 0;
+}
+
+LOCAL void
+cl_program_delete(cl_program p)
+{
+ uint32_t ref, i;
+
+ if (p == NULL)
+ return;
+
+ /* We are not done with it yet */
+ if ((ref = atomic_dec(&p->ref_n)) > 1) return;
+
+ /* Destroy the sources if still allocated */
+ cl_program_release_sources(p);
+
+ /* Remove it from the list */
+ assert(p->ctx);
+ pthread_mutex_lock(&p->ctx->program_lock);
+ if (p->prev)
+ p->prev->next = p->next;
+ if (p->next)
+ p->next->prev = p->prev;
+ if (p->prev == NULL && p->next == NULL)
+ p->ctx->programs = NULL;
+ pthread_mutex_unlock(&p->ctx->program_lock);
+
+ cl_free(p->bin); /* Free the blob */
+ for (i = 0; i < p->ker_n; ++i) /* Free the kernels */
+ cl_kernel_delete(p->ker[i]);
+ cl_free(p->ker);
+
+ /* Program belongs to their parent context */
+ cl_context_delete(p->ctx);
+
+ /* Free the program as allocated by the compiler */
+ if (p->opaque) gbe_program_delete(p->opaque);
+
+ p->magic = CL_MAGIC_DEAD_HEADER; /* For safety */
+ cl_free(p);
+}
+
+LOCAL cl_program
+cl_program_new(cl_context ctx)
+{
+ cl_program p = NULL;
+
+ /* Allocate the structure */
+ TRY_ALLOC_NO_ERR (p, CALLOC(struct _cl_program));
+ p->ref_n = 1;
+ p->magic = CL_MAGIC_PROGRAM_HEADER;
+ p->ctx = ctx;
+
+ /* The queue also belongs to its context */
+ cl_context_add_ref(ctx);
+
+exit:
+ return p;
+error:
+ cl_program_delete(p);
+ goto exit;
+}
+
+LOCAL void
+cl_program_add_ref(cl_program p)
+{
+ assert(p);
+ atomic_inc(&p->ref_n);
+}
+
+static cl_int
+cl_program_load_gen_program(cl_program p)
+{
+ cl_int err = CL_SUCCESS;
+ uint32_t i;
+
+ assert(p->opaque != NULL);
+ p->ker_n = gbe_program_get_kernel_num(p->opaque);
+
+ /* Allocate the kernel array */
+ TRY_ALLOC (p->ker, CALLOC_ARRAY(cl_kernel, p->ker_n));
+
+ for (i = 0; i < p->ker_n; ++i) {
+ const gbe_kernel opaque = gbe_program_get_kernel(p->opaque, i);
+ assert(opaque != NULL);
+ TRY_ALLOC (p->ker[i], cl_kernel_new(p));
+ cl_kernel_setup(p->ker[i], opaque);
+ }
+
+error:
+ return err;
+}
+
+LOCAL cl_program
+cl_program_create_from_binary(cl_context ctx,
+ cl_uint num_devices,
+ const cl_device_id * devices,
+ const size_t * lengths,
+ const unsigned char ** binaries,
+ cl_int * binary_status,
+ cl_int * errcode_ret)
+{
+#if 0
+ cl_program program = NULL;
+ cl_int err = CL_SUCCESS;
+
+ assert(ctx);
+ INVALID_DEVICE_IF (num_devices != 1);
+ INVALID_DEVICE_IF (devices == NULL);
+ INVALID_DEVICE_IF (devices[0] != ctx->device);
+ INVALID_VALUE_IF (binaries == NULL);
+ INVALID_VALUE_IF (lengths == NULL);
+
+ if (binaries[0] == NULL) {
+ err = CL_INVALID_VALUE;
+ if (binary_status)
+ binary_status[0] = CL_INVALID_VALUE;
+ goto error;
+ }
+
+ if (lengths[0] == 0) {
+ err = CL_INVALID_VALUE;
+ if (binary_status)
+ binary_status[0] = CL_INVALID_VALUE;
+ goto error;
+ }
+
+ // TRY_ALLOC (program, cl_program_new(ctx, (const char *) binaries[0], lengths[0]));
+
+exit:
+ if (errcode_ret)
+ *errcode_ret = err;
+ return program;
+error:
+ cl_program_delete(program);
+ program = NULL;
+ goto exit;
+#endif
+ NOT_IMPLEMENTED;
+ return CL_SUCCESS;
+}
+
+LOCAL cl_program
+cl_program_create_from_llvm(cl_context ctx,
+ cl_uint num_devices,
+ const cl_device_id *devices,
+ const char *file_name,
+ cl_int *errcode_ret)
+{
+ cl_program program = NULL;
+ cl_int err = CL_SUCCESS;
+
+ assert(ctx);
+ INVALID_DEVICE_IF (num_devices != 1);
+ INVALID_DEVICE_IF (devices == NULL);
+ INVALID_DEVICE_IF (devices[0] != ctx->device);
+ INVALID_VALUE_IF (file_name == NULL);
+
+ program = cl_program_new(ctx);
+ program->opaque = gbe_program_new_from_llvm(file_name, 0, NULL, NULL);
+ if (UNLIKELY(program->opaque == NULL)) {
+ err = CL_INVALID_PROGRAM;
+ goto error;
+ }
+
+ /* Create all the kernels */
+ TRY (cl_program_load_gen_program, program);
+ program->source_type = FROM_LLVM;
+
+exit:
+ if (errcode_ret)
+ *errcode_ret = err;
+ return program;
+error:
+ cl_program_delete(program);
+ program = NULL;
+ goto exit;
+}
+
+LOCAL cl_program
+cl_program_create_from_source(cl_context ctx,
+ cl_uint count,
+ const char **strings,
+ const size_t *lengths,
+ cl_int *errcode_ret)
+
+{
+ cl_program program = NULL;
+ cl_int err = CL_SUCCESS;
+ cl_int i;
+
+ assert(ctx);
+ INVALID_VALUE_IF (count == 0);
+ INVALID_VALUE_IF (strings == NULL);
+
+ // the real compilation step will be done at build time since we do not have
+ // yet the compilation options
+ program = cl_program_new(ctx);
+ TRY_ALLOC (program->sources, cl_calloc(count, sizeof(char*)));
+ for (i = 0; i < (int) count; ++i) {
+ size_t len;
+ if (lengths == NULL || lengths[i] == 0)
+ len = strlen(strings[i]);
+ else
+ len = lengths[i];
+ TRY_ALLOC (program->sources[i], cl_calloc(len+1, sizeof(char)));
+ memcpy(program->sources[i], strings[i], len);
+ program->sources[i][len] = 0;
+ }
+ program->src_n = count;
+ program->source_type = FROM_SOURCE;
+
+exit:
+ if (errcode_ret)
+ *errcode_ret = err;
+ return program;
+error:
+ cl_program_delete(program);
+ program = NULL;
+ goto exit;
+}
+
+LOCAL cl_int
+cl_program_build(cl_program p)
+{
+ cl_int err = CL_SUCCESS;
+
+ if (p->source_type == FROM_SOURCE) {
+ /* XXX support multiple sources later */
+ FATAL_IF (p->src_n != 1, "Only ONE source file supported");
+ p->opaque = gbe_program_new_from_source(p->sources[0], 0, NULL, NULL);
+ if (UNLIKELY(p->opaque == NULL)) {
+ err = CL_INVALID_PROGRAM;
+ goto error;
+ }
+
+ /* Create all the kernels */
+ TRY (cl_program_load_gen_program, p);
+ p->source_type = FROM_LLVM;
+ }
+
+ p->is_built = 1;
+error:
+ return err;
+}
+
+LOCAL cl_kernel
+cl_program_create_kernel(cl_program p, const char *name, cl_int *errcode_ret)
+{
+ cl_kernel from = NULL, to = NULL;
+ cl_int err = CL_SUCCESS;
+ uint32_t i = 0;
+
+ if (UNLIKELY(name == NULL)) {
+ err = CL_INVALID_KERNEL_NAME;
+ goto error;
+ }
+
+ /* Find the program first */
+ for (i = 0; i < p->ker_n; ++i) {
+ assert(p->ker[i]);
+ const char *ker_name = cl_kernel_get_name(p->ker[i]);
+ if (strcmp(ker_name, name) == 0) {
+ from = p->ker[i];
+ break;
+ }
+ }
+
+ /* We were not able to find this named kernel */
+ if (UNLIKELY(from == NULL)) {
+ err = CL_INVALID_KERNEL_NAME;
+ goto error;
+ }
+
+ TRY_ALLOC(to, cl_kernel_dup(from));
+
+exit:
+ if (errcode_ret)
+ *errcode_ret = err;
+ return to;
+error:
+ cl_kernel_delete(to);
+ to = NULL;
+ goto exit;
+}
+
diff --git a/src/cl_program.h b/src/cl_program.h
new file mode 100644
index 0000000..3838656
--- /dev/null
+++ b/src/cl_program.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __CL_PROGRAM_H__
+#define __CL_PROGRAM_H__
+
+#include "cl_internals.h"
+#include "program.h"
+#include "CL/cl.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+// This is the structure ouput by the compiler
+struct _gbe_program;
+
+enum {
+ FROM_SOURCE = 0,
+ FROM_LLVM = 1,
+ FROM_BINARY = 2
+};
+
+/* This maps an OCL file containing some kernels */
+struct _cl_program {
+ uint64_t magic; /* To identify it as a program */
+ volatile int ref_n; /* We reference count this object */
+ gbe_program opaque; /* (Opaque) program as ouput by the compiler */
+ cl_kernel *ker; /* All kernels included by the OCL file */
+ cl_program prev, next; /* We chain the programs together */
+ cl_context ctx; /* Its parent context */
+ char *bin; /* The program copied verbatim */
+ size_t bin_sz; /* Its size in memory */
+ char **sources; /* Program sources */
+ size_t src_n; /* Number of sources */
+ uint32_t ker_n; /* Number of declared kernels */
+ uint32_t source_type:2; /* Built from binary, source or LLVM */
+ uint32_t is_built:1; /* Did we call clBuildProgram on it? */
+};
+
+/* Create a empty program */
+extern cl_program cl_program_new(cl_context);
+
+/* Destroy and deallocate an empty kernel */
+extern void cl_program_delete(cl_program);
+
+/* Add one more reference to the object (to defer its deletion) */
+extern void cl_program_add_ref(cl_program);
+
+/* Create a kernel for the OCL user */
+extern cl_kernel cl_program_create_kernel(cl_program, const char*, cl_int*);
+
+/* Create a program from OCL source */
+extern cl_program
+cl_program_create_from_source(cl_context ctx,
+ cl_uint count,
+ const char **strings,
+ const size_t *lengths,
+ cl_int *errcode_ret);
+
+/* Directly create a program from a blob */
+extern cl_program
+cl_program_create_from_binary(cl_context context,
+ cl_uint num_devices,
+ const cl_device_id * devices,
+ const size_t * lengths,
+ const unsigned char ** binaries,
+ cl_int * binary_status,
+ cl_int * errcode_ret);
+
+/* Directly create a program from a LLVM source file */
+extern cl_program
+cl_program_create_from_llvm(cl_context context,
+ cl_uint num_devices,
+ const cl_device_id * devices,
+ const char * fileName,
+ cl_int * errcode_ret);
+
+/* Build the program as specified by OCL */
+extern cl_int
+cl_program_build(cl_program p);
+
+#endif /* __CL_PROGRAM_H__ */
+
diff --git a/src/cl_sampler.c b/src/cl_sampler.c
new file mode 100644
index 0000000..fd88a77
--- /dev/null
+++ b/src/cl_sampler.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "cl_context.h"
+#include "cl_sampler.h"
+#include "cl_utils.h"
+#include "cl_alloc.h"
+
+#include <assert.h>
+
+LOCAL cl_sampler
+cl_sampler_new(cl_context ctx,
+ cl_bool normalized_coords,
+ cl_addressing_mode address,
+ cl_filter_mode filter,
+ cl_int *errcode_ret)
+{
+ cl_sampler sampler = NULL;
+ cl_int err = CL_SUCCESS;
+
+ /* Allocate and inialize the structure itself */
+ TRY_ALLOC (sampler, CALLOC(struct _cl_sampler));
+ sampler->ref_n = 1;
+ sampler->magic = CL_MAGIC_SAMPLER_HEADER;
+ sampler->normalized_coords = normalized_coords;
+ sampler->address = address;
+ sampler->filter = filter;
+
+ /* Append the sampler in the context sampler list */
+ pthread_mutex_lock(&ctx->sampler_lock);
+ sampler->next = ctx->samplers;
+ if (ctx->samplers != NULL)
+ ctx->samplers->prev = sampler;
+ ctx->samplers = sampler;
+ pthread_mutex_unlock(&ctx->sampler_lock);
+ sampler->ctx = ctx;
+ cl_context_add_ref(ctx);
+
+exit:
+ if (errcode_ret)
+ *errcode_ret = err;
+ return sampler;
+error:
+ cl_sampler_delete(sampler);
+ sampler = NULL;
+ goto exit;
+}
+
+LOCAL void
+cl_sampler_delete(cl_sampler sampler)
+{
+ if (UNLIKELY(sampler == NULL))
+ return;
+ if (atomic_dec(&sampler->ref_n) > 1)
+ return;
+
+ assert(sampler->ctx);
+ pthread_mutex_lock(&sampler->ctx->sampler_lock);
+ if (sampler->prev)
+ sampler->prev->next = sampler->next;
+ if (sampler->next)
+ sampler->next->prev = sampler->prev;
+ if (sampler->prev == NULL && sampler->next == NULL)
+ sampler->ctx->samplers = NULL;
+ pthread_mutex_unlock(&sampler->ctx->sampler_lock);
+ cl_context_delete(sampler->ctx);
+
+ cl_free(sampler);
+}
+
+LOCAL void
+cl_sampler_add_ref(cl_sampler sampler)
+{
+ assert(sampler);
+ atomic_inc(&sampler->ref_n);
+}
+
diff --git a/src/cl_sampler.h b/src/cl_sampler.h
new file mode 100644
index 0000000..800de4c
--- /dev/null
+++ b/src/cl_sampler.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __CL_SAMPLER_H__
+#define __CL_SAMPLER_H__
+
+#include "CL/cl.h"
+#include <stdint.h>
+
+/* How to access images */
+struct _cl_sampler {
+ uint64_t magic; /* To identify it as a sampler object */
+ volatile int ref_n; /* This object is reference counted */
+ cl_sampler prev, next; /* We chain the samplers in the allocator */
+ cl_context ctx; /* Context it belongs to */
+ cl_bool normalized_coords; /* Are coordinates normalized? */
+ cl_addressing_mode address;/* CLAMP / REPEAT and so on... */
+ cl_filter_mode filter; /* LINEAR / NEAREST mostly */
+};
+
+/* Create a new sampler object */
+extern cl_sampler cl_sampler_new(cl_context,
+ cl_bool,
+ cl_addressing_mode,
+ cl_filter_mode,
+ cl_int *err);
+
+/* Unref the object and delete it if no more reference on it */
+extern void cl_sampler_delete(cl_sampler);
+
+/* Add one more reference to this object */
+extern void cl_sampler_add_ref(cl_sampler);
+
+#endif /* __CL_SAMPLER_H__ */
+
diff --git a/src/cl_utils.h b/src/cl_utils.h
new file mode 100644
index 0000000..ee7a317
--- /dev/null
+++ b/src/cl_utils.h
@@ -0,0 +1,256 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __CL_UTILS_H__
+#define __CL_UTILS_H__
+
+/* INLINE is forceinline */
+#define INLINE __attribute__((always_inline)) inline
+
+/* Branch hint */
+#define LIKELY(x) __builtin_expect((x),1)
+#define UNLIKELY(x) __builtin_expect((x),0)
+
+/* Stringify macros */
+#define JOIN(X, Y) _DO_JOIN(X, Y)
+#define _DO_JOIN(X, Y) _DO_JOIN2(X, Y)
+#define _DO_JOIN2(X, Y) X##Y
+
+/* Check compile time errors */
+#define STATIC_ASSERT(value) \
+struct JOIN(__,JOIN(__,__LINE__)) { \
+ int x[(value) ? 1 : -1]; \
+}
+
+/* Throw errors */
+#define ERR(ERROR, ...) \
+do { \
+ fprintf(stderr, "error in %s line %i\n", __FILE__, __LINE__); \
+ fprintf(stderr, __VA_ARGS__); \
+ fprintf(stderr, "\n"); \
+ err = ERROR; \
+ goto error; \
+} while (0)
+
+#define DO_ALLOC_ERR \
+do { \
+ ERR(CL_OUT_OF_HOST_MEMORY, "Out of memory"); \
+} while (0)
+
+#define ERR_IF(COND, ERROR, ...) \
+do { \
+ if (UNLIKELY(COND)) ERR (ERROR, __VA_ARGS__); \
+} while (0)
+
+#define INVALID_VALUE_IF(COND) \
+do { \
+ ERR_IF(COND, CL_INVALID_VALUE, "Invalid value"); \
+} while (0)
+
+#define INVALID_DEVICE_IF(COND) \
+do { \
+ ERR_IF(COND, CL_INVALID_DEVICE, "Invalid device"); \
+} while (0)
+
+#define MAX(x0, x1) ((x0) > (x1) ? (x0) : (x1))
+#define MIN(x0, x1) ((x0) < (x1) ? (x0) : (x1))
+#define ALIGN(A, B) (((A) % (B)) ? (A) + (B) - ((A) % (B)) : (A))
+
+#define DO_ALLOC_ERROR \
+do { \
+ err = CL_OUT_OF_HOST_MEMORY; \
+ goto error; \
+} while (0)
+
+#define FATAL(...) \
+do { \
+ fprintf(stderr, "error: "); \
+ fprintf(stderr, __VA_ARGS__); \
+ fprintf(stderr, "\n"); \
+ assert(0); \
+ exit(-1); \
+} while (0)
+
+#define FATAL_IF(COND, ...) \
+do { \
+ if (UNLIKELY(COND)) FATAL(__VA_ARGS__); \
+} while (0)
+
+#define NOT_IMPLEMENTED FATAL ("Not implemented")
+
+#define CHECK_CONTEXT(CTX) \
+do { \
+ if (UNLIKELY(CTX == NULL)) { \
+ err = CL_INVALID_CONTEXT; \
+ goto error; \
+ } \
+ if (UNLIKELY(CTX->magic != CL_MAGIC_CONTEXT_HEADER)) { \
+ err = CL_INVALID_CONTEXT; \
+ goto error; \
+ } \
+} while (0)
+
+#define CHECK_QUEUE(QUEUE) \
+do { \
+ if (UNLIKELY(QUEUE == NULL)) { \
+ err = CL_INVALID_COMMAND_QUEUE; \
+ goto error; \
+ } \
+ if (UNLIKELY(QUEUE->magic != CL_MAGIC_QUEUE_HEADER)) { \
+ err = CL_INVALID_COMMAND_QUEUE; \
+ goto error; \
+ } \
+} while (0)
+
+#define CHECK_MEM(MEM) \
+do { \
+ if (UNLIKELY(MEM == NULL)) { \
+ err = CL_INVALID_MEM; \
+ goto error; \
+ } \
+ if (UNLIKELY(MEM->magic != CL_MAGIC_MEM_HEADER)) { \
+ err = CL_INVALID_MEM; \
+ goto error; \
+ } \
+} while (0)
+
+#define CHECK_SAMPLER(SAMPLER) \
+do { \
+ if (UNLIKELY(SAMPLER == NULL)) { \
+ err = CL_INVALID_SAMPLER; \
+ goto error; \
+ } \
+ if (UNLIKELY(SAMPLER->magic != CL_MAGIC_SAMPLER_HEADER)) {\
+ err = CL_INVALID_SAMPLER; \
+ goto error; \
+ } \
+} while (0)
+
+#define CHECK_KERNEL(KERNEL) \
+do { \
+ if (UNLIKELY(KERNEL == NULL)) { \
+ err = CL_INVALID_KERNEL; \
+ goto error; \
+ } \
+ if (UNLIKELY(KERNEL->magic != CL_MAGIC_KERNEL_HEADER)) { \
+ err = CL_INVALID_KERNEL; \
+ goto error; \
+ } \
+} while (0)
+
+#define CHECK_PROGRAM(PROGRAM) \
+do { \
+ if (UNLIKELY(PROGRAM == NULL)) { \
+ err = CL_INVALID_PROGRAM; \
+ goto error; \
+ } \
+ if (UNLIKELY(PROGRAM->magic != CL_MAGIC_PROGRAM_HEADER)) {\
+ err = CL_INVALID_PROGRAM; \
+ goto error; \
+ } \
+} while (0)
+
+#define ELEMENTS(x) (sizeof(x)/sizeof(*(x)))
+#define CALLOC_STRUCT(T) (struct T*) cl_calloc(1, sizeof(struct T))
+#define CALLOC(T) (T*) cl_calloc(1, sizeof(T))
+#define CALLOC_ARRAY(T, N) (T*) cl_calloc(N, sizeof(T))
+#define MEMZERO(x) do { memset((x),0,sizeof(*(x))); } while (0)
+
+/* Run some code and catch errors */
+#define TRY(fn,...) \
+do { \
+ if (UNLIKELY((err = fn(__VA_ARGS__)) != CL_SUCCESS)) \
+ goto error; \
+} while (0)
+
+#define TRY_NO_ERR(fn,...) \
+do { \
+ if (UNLIKELY(fn(__VA_ARGS__) != CL_SUCCESS)) \
+ goto error; \
+} while (0)
+
+#define TRY_ALLOC(dst, EXPR) \
+do { \
+ if (UNLIKELY((dst = EXPR) == NULL)) \
+ DO_ALLOC_ERROR; \
+} while (0)
+
+#define TRY_ALLOC_NO_ERR(dst, EXPR) \
+do { \
+ if (UNLIKELY((dst = EXPR) == NULL)) \
+ goto error; \
+} while (0)
+
+#define TRY_ALLOC_NO_RET(EXPR) \
+do { \
+ if (UNLIKELY((EXPR) == NULL)) \
+ DO_ALLOC_ERROR; \
+} while (0)
+
+/* Break Point Definitions */
+#if !defined(NDEBUG)
+
+#define BREAK \
+do { \
+ __asm__("int3"); \
+} while(0)
+
+#define BREAK_IF(value) \
+do { \
+ if (UNLIKELY(!(value))) BREAKPOINT(); \
+} while(0)
+
+#else
+#define BREAKPOINT() do { } while(0)
+#define ASSERT(value) do { } while(0)
+#endif
+
+/* For all internal functions */
+#define LOCAL __attribute__ ((visibility ("internal")))
+
+/* Align a structure or a variable */
+#define ALIGNED(X) __attribute__ ((aligned (X)))
+
+/* Number of DWORDS */
+#define SIZEOF32(X) (sizeof(X) / sizeof(uint32_t))
+
+/* Memory quantity */
+#define KB 1024
+#define MB (KB*KB)
+
+/* To help bitfield definitions */
+#define BITFIELD_BIT(X) 1
+#define BITFIELD_RANGE(X,Y) ((Y) - (X) + 1)
+
+/* 32 bits atomic variable */
+typedef volatile int atomic_t;
+
+static INLINE int atomic_add(atomic_t *v, const int c) {
+ register int i = c;
+ __asm__ __volatile__("lock ; xaddl %0, %1;"
+ : "+r"(i), "+m"(*v)
+ : "m"(*v), "r"(i));
+ return i;
+}
+
+static INLINE int atomic_inc(atomic_t *v) { return atomic_add(v, 1); }
+static INLINE int atomic_dec(atomic_t *v) { return atomic_add(v, -1); }
+
+#endif /* __CL_UTILS_H__ */
+
diff --git a/src/intel/Makefile b/src/intel/Makefile
new file mode 100644
index 0000000..c8f77f9
--- /dev/null
+++ b/src/intel/Makefile
@@ -0,0 +1,4 @@
+TOP=../..
+SUBDIRS=.
+
+include $(TOP)/Makefile.shared
diff --git a/src/intel/intel_batchbuffer.c b/src/intel/intel_batchbuffer.c
new file mode 100644
index 0000000..89f8676
--- /dev/null
+++ b/src/intel/intel_batchbuffer.c
@@ -0,0 +1,177 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "intel/intel_batchbuffer.h"
+#include "intel/intel_driver.h"
+#include "cl_alloc.h"
+#include "cl_utils.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+LOCAL void
+intel_batchbuffer_reset(intel_batchbuffer_t *batch, size_t sz)
+{
+ if (batch->buffer != NULL) {
+ dri_bo_unreference(batch->buffer);
+ batch->buffer = NULL;
+ }
+
+ batch->buffer = dri_bo_alloc(batch->intel->bufmgr,
+ "batch buffer",
+ sz,
+ 64);
+ assert(batch->buffer);
+
+ dri_bo_map(batch->buffer, 1);
+ batch->map = (uint8_t*) batch->buffer->virtual;
+ batch->size = sz;
+ batch->ptr = batch->map;
+ batch->atomic = 0;
+}
+
+LOCAL void
+intel_batchbuffer_init(intel_batchbuffer_t *batch, intel_driver_t *intel)
+{
+ assert(intel);
+ batch->intel = intel;
+}
+
+LOCAL void
+intel_batchbuffer_terminate(intel_batchbuffer_t *batch)
+{
+ assert(batch->buffer);
+
+ if (batch->map) {
+ dri_bo_unmap(batch->buffer);
+ batch->map = NULL;
+ }
+
+ dri_bo_unreference(batch->buffer);
+ batch->buffer = NULL;
+}
+
+LOCAL void
+intel_batchbuffer_flush(intel_batchbuffer_t *batch)
+{
+ uint32_t used = batch->ptr - batch->map;
+ int is_locked = batch->intel->locked;
+
+ if (used == 0)
+ return;
+
+ if ((used & 4) == 0) {
+ *(uint32_t*) batch->ptr = 0;
+ batch->ptr += 4;
+ }
+
+ *(uint32_t*)batch->ptr = MI_BATCH_BUFFER_END;
+ batch->ptr += 4;
+ dri_bo_unmap(batch->buffer);
+ used = batch->ptr - batch->map;
+
+ if (!is_locked)
+ intel_driver_lock_hardware(batch->intel);
+
+ dri_bo_exec(batch->buffer, used, 0, 0, 0);
+ if (!is_locked)
+ intel_driver_unlock_hardware(batch->intel);
+
+ // Release the buffer
+ intel_batchbuffer_terminate(batch);
+}
+
+LOCAL void
+intel_batchbuffer_emit_reloc(intel_batchbuffer_t *batch,
+ dri_bo *bo,
+ uint32_t read_domains,
+ uint32_t write_domains,
+ uint32_t delta)
+{
+ assert(batch->ptr - batch->map < batch->size);
+ dri_bo_emit_reloc(batch->buffer,
+ read_domains,
+ write_domains,
+ delta,
+ batch->ptr - batch->map,
+ bo);
+ intel_batchbuffer_emit_dword(batch, bo->offset + delta);
+}
+
+LOCAL void
+intel_batchbuffer_emit_mi_flush(intel_batchbuffer_t *batch)
+{
+ intel_batchbuffer_require_space(batch, 4);
+ intel_batchbuffer_emit_dword(batch, MI_FLUSH | STATE_INSTRUCTION_CACHE_INVALIDATE);
+}
+
+LOCAL intel_batchbuffer_t*
+intel_batchbuffer_new(intel_driver_t *intel)
+{
+ intel_batchbuffer_t *batch = NULL;
+ assert(intel);
+ TRY_ALLOC_NO_ERR (batch, CALLOC(intel_batchbuffer_t));
+ intel_batchbuffer_init(batch, intel);
+
+exit:
+ return batch;
+error:
+ intel_batchbuffer_delete(batch);
+ batch = NULL;
+ goto exit;
+}
+
+LOCAL void
+intel_batchbuffer_delete(intel_batchbuffer_t *batch)
+{
+ if (batch == NULL)
+ return;
+ if(batch->buffer)
+ intel_batchbuffer_terminate(batch);
+ cl_free(batch);
+}
+
diff --git a/src/intel/intel_batchbuffer.h b/src/intel/intel_batchbuffer.h
new file mode 100644
index 0000000..ece8307
--- /dev/null
+++ b/src/intel/intel_batchbuffer.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef _INTEL_BATCHBUFFER_H_
+#define _INTEL_BATCHBUFFER_H_
+
+#include "intel_defines.h"
+#include "cl_utils.h"
+
+#include <xf86drm.h>
+#include <drm.h>
+#include <i915_drm.h>
+#include <intel_bufmgr.h>
+#include <stdint.h>
+#include <memory.h>
+#include <assert.h>
+
+#define BEGIN_BATCH(b, n) do { \
+ intel_batchbuffer_require_space(b, (n) * 4); \
+} while (0)
+
+#define OUT_BATCH(b, d) do { \
+ intel_batchbuffer_emit_dword(b, d); \
+} while (0)
+
+#define OUT_RELOC(b, bo, read_domains, write_domain, delta) do { \
+ assert((delta) >= 0); \
+ intel_batchbuffer_emit_reloc(b, bo, read_domains, write_domain, delta); \
+} while (0)
+
+#define ADVANCE_BATCH(b) do { } while (0)
+
+struct intel_driver;
+
+typedef struct intel_batchbuffer
+{
+ struct intel_driver *intel;
+ drm_intel_bo *buffer;
+ uint32_t size;
+ uint8_t *map;
+ uint8_t *ptr;
+ int atomic;
+} intel_batchbuffer_t;
+
+extern intel_batchbuffer_t* intel_batchbuffer_new(struct intel_driver*);
+extern void intel_batchbuffer_delete(intel_batchbuffer_t*);
+extern void intel_batchbuffer_emit_reloc(intel_batchbuffer_t*,
+ drm_intel_bo*,
+ uint32_t read_domains,
+ uint32_t write_domains,
+ uint32_t delta);
+extern void intel_batchbuffer_emit_mi_flush(intel_batchbuffer_t*);
+extern void intel_batchbuffer_init(intel_batchbuffer_t*, struct intel_driver*);
+extern void intel_batchbuffer_terminate(intel_batchbuffer_t*);
+extern void intel_batchbuffer_flush(intel_batchbuffer_t*);
+extern void intel_batchbuffer_reset(intel_batchbuffer_t*, size_t sz);
+
+static INLINE uint32_t
+intel_batchbuffer_space(const intel_batchbuffer_t *batch)
+{
+ assert(batch->ptr);
+ return batch->size - (batch->ptr - batch->map);
+}
+
+static INLINE void
+intel_batchbuffer_emit_dword(intel_batchbuffer_t *batch, uint32_t x)
+{
+ assert(intel_batchbuffer_space(batch) >= 4);
+ *(uint32_t*)batch->ptr = x;
+ batch->ptr += 4;
+}
+
+static INLINE void
+intel_batchbuffer_require_space(intel_batchbuffer_t *batch, uint32_t size) {
+ assert(size < batch->size - 8);
+ if (intel_batchbuffer_space(batch) < size)
+ intel_batchbuffer_space(batch);
+}
+
+static INLINE uint8_t*
+intel_batchbuffer_alloc_space(intel_batchbuffer_t *batch, uint32_t size)
+{
+ assert(intel_batchbuffer_space(batch) >= size);
+ uint8_t *space_ptr = batch->ptr;
+ batch->ptr += size;
+ return space_ptr;
+}
+
+static INLINE void
+intel_batchbuffer_start_atomic(intel_batchbuffer_t *batch, uint32_t size)
+{
+ assert(!batch->atomic);
+ intel_batchbuffer_require_space(batch, size);
+ batch->atomic = 1;
+}
+
+static INLINE void
+intel_batchbuffer_end_atomic(intel_batchbuffer_t *batch)
+{
+ assert(batch->atomic);
+ batch->atomic = 0;
+}
+
+#endif /* _INTEL_BATCHBUFFER_H_ */
+
diff --git a/src/intel/intel_defines.h b/src/intel/intel_defines.h
new file mode 100644
index 0000000..fbf4619
--- /dev/null
+++ b/src/intel/intel_defines.h
@@ -0,0 +1,305 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith at tungstengraphics.com>
+ */
+#ifndef __GENX_DEFINES_H__
+#define __GENX_DEFINES_H__
+
+#define CMD(PIPELINE,OP,SUB_OP) ((3 << 29) | \
+ ((PIPELINE) << 27) | \
+ ((OP) << 24) | \
+ ((SUB_OP) << 16))
+
+#define CMD_URB_FENCE CMD(0, 0, 0)
+#define CMD_CS_URB_STATE CMD(0, 0, 1)
+#define CMD_CONSTANT_BUFFER CMD(0, 0, 2)
+#define CMD_STATE_PREFETCH CMD(0, 0, 3)
+#define CMD_MEDIA_GATEWAY_STATE CMD(2, 0, 3)
+#define CMD_MEDIA_STATE_FLUSH CMD(2, 0, 4)
+#define CMD_GPGPU_WALKER CMD(2, 1, 5)
+
+#define CMD_LOAD_REGISTER_IMM (0x22 << 23)
+
+#define CMD_STATE_BASE_ADDRESS CMD(0, 1, 1)
+#define CMD_STATE_SIP CMD(0, 1, 2)
+#define CMD_PIPELINE_SELECT CMD(1, 1, 4)
+#define CMD_SAMPLER_PALETTE_LOAD CMD(3, 1, 2)
+
+#define CMD_MEDIA_STATE_POINTERS CMD(2, 0, 0)
+#define CMD_MEDIA CMD(2, 1, 0)
+#define CMD_MEDIA_EX CMD(2, 1, 1)
+
+#define CMD_PIPELINED_POINTERS CMD(3, 0, 0)
+#define CMD_BINDING_TABLE_POINTERS CMD(3, 0, 1)
+#define CMD_VERTEX_BUFFERS CMD(3, 0, 8)
+#define CMD_VERTEX_ELEMENTS CMD(3, 0, 9)
+#define CMD_DRAWING_RECTANGLE CMD(3, 1, 0)
+#define CMD_CONSTANT_COLOR CMD(3, 1, 1)
+#define CMD_3DPRIMITIVE CMD(3, 3, 0)
+
+#define BASE_ADDRESS_MODIFY (1 << 0)
+
+#define PIPELINE_SELECT_3D 0
+#define PIPELINE_SELECT_MEDIA 1
+
+#define UF0_CS_REALLOC (1 << 13)
+#define UF0_VFE_REALLOC (1 << 12)
+#define UF0_SF_REALLOC (1 << 11)
+#define UF0_CLIP_REALLOC (1 << 10)
+#define UF0_GS_REALLOC (1 << 9)
+#define UF0_VS_REALLOC (1 << 8)
+#define UF1_CLIP_FENCE_SHIFT 20
+#define UF1_GS_FENCE_SHIFT 10
+#define UF1_VS_FENCE_SHIFT 0
+#define UF2_CS_FENCE_SHIFT 20
+#define UF2_VFE_FENCE_SHIFT 10
+#define UF2_SF_FENCE_SHIFT 0
+
+#define FLOATING_POINT_IEEE_754 0
+#define FLOATING_POINT_NON_IEEE_754 1
+
+#define I965_SURFACE_1D 0
+#define I965_SURFACE_2D 1
+#define I965_SURFACE_3D 2
+#define I965_SURFACE_CUBE 3
+#define I965_SURFACE_BUFFER 4
+#define I965_SURFACE_NULL 7
+
+#define I965_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
+#define I965_SURFACEFORMAT_R32G32B32A32_SINT 0x001
+#define I965_SURFACEFORMAT_R32G32B32A32_UINT 0x002
+#define I965_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
+#define I965_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
+#define I965_SURFACEFORMAT_R64G64_FLOAT 0x005
+#define I965_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
+#define I965_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
+#define I965_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
+#define I965_SURFACEFORMAT_R32G32B32_FLOAT 0x040
+#define I965_SURFACEFORMAT_R32G32B32_SINT 0x041
+#define I965_SURFACEFORMAT_R32G32B32_UINT 0x042
+#define I965_SURFACEFORMAT_R32G32B32_UNORM 0x043
+#define I965_SURFACEFORMAT_R32G32B32_SNORM 0x044
+#define I965_SURFACEFORMAT_R32G32B32_SSCALED 0x045
+#define I965_SURFACEFORMAT_R32G32B32_USCALED 0x046
+#define I965_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
+#define I965_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
+#define I965_SURFACEFORMAT_R16G16B16A16_SINT 0x082
+#define I965_SURFACEFORMAT_R16G16B16A16_UINT 0x083
+#define I965_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
+#define I965_SURFACEFORMAT_R32G32_FLOAT 0x085
+#define I965_SURFACEFORMAT_R32G32_SINT 0x086
+#define I965_SURFACEFORMAT_R32G32_UINT 0x087
+#define I965_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
+#define I965_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
+#define I965_SURFACEFORMAT_L32A32_FLOAT 0x08A
+#define I965_SURFACEFORMAT_R32G32_UNORM 0x08B
+#define I965_SURFACEFORMAT_R32G32_SNORM 0x08C
+#define I965_SURFACEFORMAT_R64_FLOAT 0x08D
+#define I965_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
+#define I965_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
+#define I965_SURFACEFORMAT_A32X32_FLOAT 0x090
+#define I965_SURFACEFORMAT_L32X32_FLOAT 0x091
+#define I965_SURFACEFORMAT_I32X32_FLOAT 0x092
+#define I965_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
+#define I965_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
+#define I965_SURFACEFORMAT_R32G32_SSCALED 0x095
+#define I965_SURFACEFORMAT_R32G32_USCALED 0x096
+#define I965_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
+#define I965_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
+#define I965_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
+#define I965_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
+#define I965_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
+#define I965_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
+#define I965_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
+#define I965_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
+#define I965_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
+#define I965_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
+#define I965_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
+#define I965_SURFACEFORMAT_R16G16_UNORM 0x0CC
+#define I965_SURFACEFORMAT_R16G16_SNORM 0x0CD
+#define I965_SURFACEFORMAT_R16G16_SINT 0x0CE
+#define I965_SURFACEFORMAT_R16G16_UINT 0x0CF
+#define I965_SURFACEFORMAT_R16G16_FLOAT 0x0D0
+#define I965_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
+#define I965_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
+#define I965_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
+#define I965_SURFACEFORMAT_R32_SINT 0x0D6
+#define I965_SURFACEFORMAT_R32_UINT 0x0D7
+#define I965_SURFACEFORMAT_R32_FLOAT 0x0D8
+#define I965_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
+#define I965_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
+#define I965_SURFACEFORMAT_L16A16_UNORM 0x0DF
+#define I965_SURFACEFORMAT_I24X8_UNORM 0x0E0
+#define I965_SURFACEFORMAT_L24X8_UNORM 0x0E1
+#define I965_SURFACEFORMAT_A24X8_UNORM 0x0E2
+#define I965_SURFACEFORMAT_I32_FLOAT 0x0E3
+#define I965_SURFACEFORMAT_L32_FLOAT 0x0E4
+#define I965_SURFACEFORMAT_A32_FLOAT 0x0E5
+#define I965_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
+#define I965_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
+#define I965_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
+#define I965_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
+#define I965_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
+#define I965_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
+#define I965_SURFACEFORMAT_L16A16_FLOAT 0x0F0
+#define I965_SURFACEFORMAT_R32_UNORM 0x0F1
+#define I965_SURFACEFORMAT_R32_SNORM 0x0F2
+#define I965_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
+#define I965_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
+#define I965_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
+#define I965_SURFACEFORMAT_R16G16_SSCALED 0x0F6
+#define I965_SURFACEFORMAT_R16G16_USCALED 0x0F7
+#define I965_SURFACEFORMAT_R32_SSCALED 0x0F8
+#define I965_SURFACEFORMAT_R32_USCALED 0x0F9
+#define I965_SURFACEFORMAT_B5G6R5_UNORM 0x100
+#define I965_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
+#define I965_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
+#define I965_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
+#define I965_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
+#define I965_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
+#define I965_SURFACEFORMAT_R8G8_UNORM 0x106
+#define I965_SURFACEFORMAT_R8G8_SNORM 0x107
+#define I965_SURFACEFORMAT_R8G8_SINT 0x108
+#define I965_SURFACEFORMAT_R8G8_UINT 0x109
+#define I965_SURFACEFORMAT_R16_UNORM 0x10A
+#define I965_SURFACEFORMAT_R16_SNORM 0x10B
+#define I965_SURFACEFORMAT_R16_SINT 0x10C
+#define I965_SURFACEFORMAT_R16_UINT 0x10D
+#define I965_SURFACEFORMAT_R16_FLOAT 0x10E
+#define I965_SURFACEFORMAT_I16_UNORM 0x111
+#define I965_SURFACEFORMAT_L16_UNORM 0x112
+#define I965_SURFACEFORMAT_A16_UNORM 0x113
+#define I965_SURFACEFORMAT_L8A8_UNORM 0x114
+#define I965_SURFACEFORMAT_I16_FLOAT 0x115
+#define I965_SURFACEFORMAT_L16_FLOAT 0x116
+#define I965_SURFACEFORMAT_A16_FLOAT 0x117
+#define I965_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
+#define I965_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
+#define I965_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
+#define I965_SURFACEFORMAT_R8G8_SSCALED 0x11C
+#define I965_SURFACEFORMAT_R8G8_USCALED 0x11D
+#define I965_SURFACEFORMAT_R16_SSCALED 0x11E
+#define I965_SURFACEFORMAT_R16_USCALED 0x11F
+#define I965_SURFACEFORMAT_R8_UNORM 0x140
+#define I965_SURFACEFORMAT_R8_SNORM 0x141
+#define I965_SURFACEFORMAT_R8_SINT 0x142
+#define I965_SURFACEFORMAT_R8_UINT 0x143
+#define I965_SURFACEFORMAT_A8_UNORM 0x144
+#define I965_SURFACEFORMAT_I8_UNORM 0x145
+#define I965_SURFACEFORMAT_L8_UNORM 0x146
+#define I965_SURFACEFORMAT_P4A4_UNORM 0x147
+#define I965_SURFACEFORMAT_A4P4_UNORM 0x148
+#define I965_SURFACEFORMAT_R8_SSCALED 0x149
+#define I965_SURFACEFORMAT_R8_USCALED 0x14A
+#define I965_SURFACEFORMAT_R1_UINT 0x181
+#define I965_SURFACEFORMAT_YCRCB_NORMAL 0x182
+#define I965_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
+#define I965_SURFACEFORMAT_BC1_UNORM 0x186
+#define I965_SURFACEFORMAT_BC2_UNORM 0x187
+#define I965_SURFACEFORMAT_BC3_UNORM 0x188
+#define I965_SURFACEFORMAT_BC4_UNORM 0x189
+#define I965_SURFACEFORMAT_BC5_UNORM 0x18A
+#define I965_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
+#define I965_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
+#define I965_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
+#define I965_SURFACEFORMAT_MONO8 0x18E
+#define I965_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
+#define I965_SURFACEFORMAT_YCRCB_SWAPY 0x190
+#define I965_SURFACEFORMAT_DXT1_RGB 0x191
+#define I965_SURFACEFORMAT_FXT1 0x192
+#define I965_SURFACEFORMAT_R8G8B8_UNORM 0x193
+#define I965_SURFACEFORMAT_R8G8B8_SNORM 0x194
+#define I965_SURFACEFORMAT_R8G8B8_SSCALED 0x195
+#define I965_SURFACEFORMAT_R8G8B8_USCALED 0x196
+#define I965_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
+#define I965_SURFACEFORMAT_R64G64B64_FLOAT 0x198
+#define I965_SURFACEFORMAT_BC4_SNORM 0x199
+#define I965_SURFACEFORMAT_BC5_SNORM 0x19A
+#define I965_SURFACEFORMAT_R16G16B16_UNORM 0x19C
+#define I965_SURFACEFORMAT_R16G16B16_SNORM 0x19D
+#define I965_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
+#define I965_SURFACEFORMAT_R16G16B16_USCALED 0x19F
+#define I965_SURFACEFORMAT_RAW 0x1FF
+
+#define I965_MAPFILTER_NEAREST 0x0
+#define I965_MAPFILTER_LINEAR 0x1
+#define I965_MAPFILTER_ANISOTROPIC 0x2
+
+#define I965_MIPFILTER_NONE 0
+#define I965_MIPFILTER_NEAREST 1
+#define I965_MIPFILTER_LINEAR 3
+
+#define I965_TEXCOORDMODE_WRAP 0
+#define I965_TEXCOORDMODE_MIRROR 1
+#define I965_TEXCOORDMODE_CLAMP 2
+#define I965_TEXCOORDMODE_CUBE 3
+#define I965_TEXCOORDMODE_CLAMP_BORDER 4
+#define I965_TEXCOORDMODE_MIRROR_ONCE 5
+
+#define I965_SURFACERETURNFORMAT_FLOAT32 0
+#define I965_SURFACERETURNFORMAT_S1 1
+
+#define I965_TILEWALK_XMAJOR 0
+#define I965_TILEWALK_YMAJOR 1
+
+#define URB_SIZE(intel) (IS_IGDNG(intel->device_id) ? 1024 : \
+ IS_G4X(intel->device_id) ? 384 : 256)
+
+// L3 cache stuff
+#define GEN7_L3_CNTL_REG2_ADDRESS_OFFSET (0xB020)
+#define GEN7_L3_CNTL_REG3_ADDRESS_OFFSET (0xB024)
+
+// To issue pipe controls (reset L3 / SLM or stall)
+#define GEN7_PIPE_CONTROL_MEDIA 0x2
+#define GEN7_PIPE_CONTROL_3D 0x3
+#define GEN7_PIPE_CONTROL_INSTRUCTION_GFX 0x3
+#define GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL 0x2
+#define GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL 0x0
+
+#endif /* __GENX_DEFINES_H__ */
+
diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
new file mode 100644
index 0000000..0c79713
--- /dev/null
+++ b/src/intel/intel_driver.c
@@ -0,0 +1,399 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/*
+ * Copyright 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Xiang Haihao <haihao.xiang at intel.com>
+ * Zou Nan hai <nanhai.zou at intel.com>
+ *
+ */
+#include "intel_driver.h"
+#include "intel_gpgpu.h"
+#include "intel_batchbuffer.h"
+#include "x11/dricommon.h"
+
+#include <assert.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <xf86drm.h>
+#include <stdio.h>
+
+#include "cl_utils.h"
+#include "cl_alloc.h"
+#include "cl_driver.h"
+
+#define SET_BLOCKED_SIGSET(DRIVER) do { \
+ sigset_t bl_mask; \
+ sigfillset(&bl_mask); \
+ sigdelset(&bl_mask, SIGFPE); \
+ sigdelset(&bl_mask, SIGILL); \
+ sigdelset(&bl_mask, SIGSEGV); \
+ sigdelset(&bl_mask, SIGBUS); \
+ sigdelset(&bl_mask, SIGKILL); \
+ pthread_sigmask(SIG_SETMASK, &bl_mask, &(DRIVER)->sa_mask); \
+} while (0)
+
+#define RESTORE_BLOCKED_SIGSET(DRIVER) do { \
+ pthread_sigmask(SIG_SETMASK, &(DRIVER)->sa_mask, NULL); \
+} while (0)
+
+#define PPTHREAD_MUTEX_LOCK(DRIVER) do { \
+ SET_BLOCKED_SIGSET(DRIVER); \
+ pthread_mutex_lock(&(DRIVER)->ctxmutex); \
+} while (0)
+
+#define PPTHREAD_MUTEX_UNLOCK(DRIVER) do { \
+ pthread_mutex_unlock(&(DRIVER)->ctxmutex); \
+ RESTORE_BLOCKED_SIGSET(DRIVER); \
+} while (0)
+
+static void
+intel_driver_delete(intel_driver_t *driver)
+{
+ if (driver == NULL)
+ return;
+ if (driver->bufmgr)
+ drm_intel_bufmgr_destroy(driver->bufmgr);
+ cl_free(driver);
+}
+
+static intel_driver_t*
+intel_driver_new(void)
+{
+ intel_driver_t *driver = NULL;
+
+ TRY_ALLOC_NO_ERR (driver, CALLOC(intel_driver_t));
+ driver->fd = -1;
+
+exit:
+ return driver;
+error:
+ intel_driver_delete(driver);
+ driver = NULL;
+ goto exit;
+}
+
+/* just used for maximum relocation number in drm_intel */
+#define BATCH_SIZE 0x1000
+
+static void
+intel_driver_memman_init(intel_driver_t *driver)
+{
+ driver->bufmgr = drm_intel_bufmgr_gem_init(driver->fd, BATCH_SIZE);
+ assert(driver->bufmgr);
+ drm_intel_bufmgr_gem_enable_reuse(driver->bufmgr);
+}
+
+static void
+intel_driver_init(intel_driver_t *driver, int dev_fd)
+{
+ driver->fd = dev_fd;
+ driver->locked = 0;
+ pthread_mutex_init(&driver->ctxmutex, NULL);
+#ifndef NDEBUG
+ int res =
+#endif /* NDEBUG */
+ intel_driver_get_param(driver, I915_PARAM_CHIPSET_ID, &driver->device_id);
+ assert(res);
+ intel_driver_memman_init(driver);
+
+#if EMULATE_GEN
+ driver->gen_ver = EMULATE_GEN;
+ if (EMULATE_GEN == 75)
+ driver->device_id = PCI_CHIP_HASWELL_L; /* we pick L for HSW */
+ else if (EMULATE_GEN == 7)
+ driver->device_id = PCI_CHIP_IVYBRIDGE_GT2; /* we pick GT2 for IVB */
+ else if (EMULATE_GEN == 6)
+ driver->device_id = PCI_CHIP_SANDYBRIDGE_GT2; /* we pick GT2 for SNB */
+ else
+ FATAL ("Unsupported Gen for emulation");
+#else
+ if (IS_GEN75(driver->device_id))
+ driver->gen_ver = 75;
+ else if (IS_GEN7(driver->device_id))
+ driver->gen_ver = 7;
+ else if (IS_GEN6(driver->device_id))
+ driver->gen_ver = 6;
+ else if(IS_IGDNG(driver->device_id))
+ driver->gen_ver = 5;
+ else
+ driver->gen_ver = 4;
+#endif /* EMULATE_GEN */
+}
+
+static void
+intel_driver_open(intel_driver_t *intel)
+{
+ int cardi;
+ intel->x11_display = XOpenDisplay(":0.0");
+
+ if(intel->x11_display) {
+ if((intel->dri_ctx = getDRI2State(intel->x11_display,
+ DefaultScreen(intel->x11_display),
+ NULL)))
+ intel_driver_init_shared(intel, intel->dri_ctx);
+ else
+ printf("X server found. dri2 connection failed! \n");
+ } else {
+ printf("Can't find X server!\n");
+ }
+
+ if(!intel_driver_is_active(intel)) {
+ printf("Trying to open directly...");
+ char card_name[20];
+ for(cardi = 0; cardi < 16; cardi++) {
+ sprintf(card_name, "/dev/dri/card%d", cardi);
+ if(intel_driver_init_master(intel, card_name)) {
+ printf("Success at %s.\n", card_name);
+ break;
+ }
+ }
+ }
+ if(!intel_driver_is_active(intel)) {
+ printf("Device open failed\n");
+ exit(-1);
+ }
+}
+
+static void
+intel_driver_close(intel_driver_t *intel)
+{
+ if(intel->dri_ctx) dri_state_release(intel->dri_ctx);
+ if(intel->x11_display) XCloseDisplay(intel->x11_display);
+ if(intel->fd) close(intel->fd);
+ intel->dri_ctx = NULL;
+ intel->x11_display = NULL;
+ intel->fd = 0;
+}
+
+LOCAL int
+intel_driver_get_param(intel_driver_t *driver, int param, int *value)
+{
+ int ret;
+ struct drm_i915_getparam gp;
+
+ memset(&gp, 0, sizeof(struct drm_i915_getparam));
+ gp.param = param;
+ gp.value = value;
+
+ ret = drmCommandWriteRead(driver->fd, DRM_I915_GETPARAM, &gp, sizeof(gp));
+ return ret == 0;
+}
+
+LOCAL int
+intel_driver_is_active(intel_driver_t *driver) {
+ return driver->fd >= 0;
+}
+
+LOCAL int
+intel_driver_init_shared(intel_driver_t *driver, dri_state_t *state)
+{
+ assert(state);
+ if(state->driConnectedFlag != DRI2)
+ return 0;
+ intel_driver_init(driver, state->fd);
+ driver->master = 0;
+ return 1;
+}
+
+LOCAL int
+intel_driver_init_master(intel_driver_t *driver, const char* dev_name)
+{
+ int dev_fd;
+
+ drm_client_t client;
+
+ // usually dev_name = "/dev/dri/card%d"
+ dev_fd = open(dev_name, O_RDWR);
+ if (dev_fd == -1) return 0;
+
+ // Check that we're authenticated and the only opener
+ memset(&client, 0, sizeof(drm_client_t));
+ int ret = ioctl(dev_fd, DRM_IOCTL_GET_CLIENT, &client);
+ assert (ret == 0);
+
+ if (!client.auth) {
+ close(dev_fd);
+ return 0;
+ }
+
+ client.idx = 1;
+ ret = ioctl(dev_fd, DRM_IOCTL_GET_CLIENT, &client);
+ if (ret != -1 || errno != EINVAL) {
+ close(dev_fd);
+ return 0;
+ }
+
+ intel_driver_init(driver, dev_fd);
+ driver->master = 1;
+
+ return 1;
+}
+
+LOCAL int
+intel_driver_terminate(intel_driver_t *driver)
+{
+ pthread_mutex_destroy(&driver->ctxmutex);
+
+ if(driver->master)
+ close(driver->fd);
+ driver->fd = -1;
+ return 1;
+}
+
+LOCAL void
+intel_driver_lock_hardware(intel_driver_t *driver)
+{
+
+ PPTHREAD_MUTEX_LOCK(driver);
+ assert(!driver->locked);
+ driver->locked = 1;
+}
+
+LOCAL void
+intel_driver_unlock_hardware(intel_driver_t *driver)
+{
+ driver->locked = 0;
+ PPTHREAD_MUTEX_UNLOCK(driver);
+}
+
+LOCAL dri_bo*
+intel_driver_share_buffer(intel_driver_t *driver, uint32_t name)
+{
+ assert(!driver->master);
+ dri_bo *bo = intel_bo_gem_create_from_name(driver->bufmgr,
+ "rendering buffer",
+ name);
+ return bo;
+}
+
+LOCAL uint32_t
+intel_driver_shared_name(intel_driver_t *driver, dri_bo *bo)
+{
+ uint32_t name;
+ assert(!driver->master);
+ assert(bo);
+ dri_bo_flink(bo, &name);
+ return name;
+}
+
+static int
+intel_get_device_id(void)
+{
+ intel_driver_t *driver = NULL;
+ int intel_device_id;
+
+ driver = intel_driver_new();
+ assert(driver != NULL);
+ intel_driver_open(driver);
+ intel_device_id = driver->device_id;
+ intel_driver_close(driver);
+ intel_driver_terminate(driver);
+ intel_driver_delete(driver);
+
+ return intel_device_id;
+}
+
+static void
+cl_intel_driver_delete(intel_driver_t *driver)
+{
+ if (driver == NULL)
+ return;
+ intel_driver_close(driver);
+ intel_driver_terminate(driver);
+ intel_driver_delete(driver);
+}
+
+static intel_driver_t*
+cl_intel_driver_new(void)
+{
+ intel_driver_t *driver = NULL;
+ TRY_ALLOC_NO_ERR (driver, intel_driver_new());
+ intel_driver_open(driver);
+
+exit:
+ return driver;
+error:
+ cl_intel_driver_delete(driver);
+ driver = NULL;
+ goto exit;
+}
+
+static drm_intel_bufmgr*
+intel_driver_get_bufmgr(intel_driver_t *drv)
+{
+ return drv->bufmgr;
+}
+
+static uint32_t
+intel_driver_get_ver(struct intel_driver *drv)
+{
+ return drv->gen_ver;
+}
+
+static size_t drm_intel_bo_get_size(drm_intel_bo *bo) { return bo->size; }
+static void* drm_intel_bo_get_virtual(drm_intel_bo *bo) { return bo->virtual; }
+
+LOCAL void
+intel_setup_callbacks(void)
+{
+ cl_driver_new = (cl_driver_new_cb *) cl_intel_driver_new;
+ cl_driver_delete = (cl_driver_delete_cb *) cl_intel_driver_delete;
+ cl_driver_get_ver = (cl_driver_get_ver_cb *) intel_driver_get_ver;
+ cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *) intel_driver_get_bufmgr;
+ cl_driver_get_device_id = (cl_driver_get_device_id_cb *) intel_get_device_id;
+ cl_buffer_alloc = (cl_buffer_alloc_cb *) drm_intel_bo_alloc;
+ cl_buffer_reference = (cl_buffer_reference_cb *) drm_intel_bo_reference;
+ cl_buffer_unreference = (cl_buffer_unreference_cb *) drm_intel_bo_unreference;
+ cl_buffer_map = (cl_buffer_map_cb *) drm_intel_bo_map;
+ cl_buffer_unmap = (cl_buffer_unmap_cb *) drm_intel_bo_unmap;
+ cl_buffer_get_virtual = (cl_buffer_get_virtual_cb *) drm_intel_bo_get_virtual;
+ cl_buffer_get_size = (cl_buffer_get_size_cb *) drm_intel_bo_get_size;
+ cl_buffer_pin = (cl_buffer_pin_cb *) drm_intel_bo_pin;
+ cl_buffer_unpin = (cl_buffer_unpin_cb *) drm_intel_bo_unpin;
+ cl_buffer_subdata = (cl_buffer_subdata_cb *) drm_intel_bo_subdata;
+ cl_buffer_wait_rendering = (cl_buffer_wait_rendering_cb *) drm_intel_bo_wait_rendering;
+ intel_set_gpgpu_callbacks();
+}
+
diff --git a/src/intel/intel_driver.h b/src/intel/intel_driver.h
new file mode 100644
index 0000000..8042059
--- /dev/null
+++ b/src/intel/intel_driver.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/*
+ * Copyright 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef _INTEL_DRIVER_H_
+#define _INTEL_DRIVER_H_
+
+#include "cl_device_data.h"
+
+#include <stdint.h>
+#include <pthread.h>
+#include <signal.h>
+
+#include <xf86drm.h>
+#include <drm.h>
+#include <i915_drm.h>
+#include <intel_bufmgr.h>
+
+#define CMD_MI (0x0 << 29)
+#define CMD_2D (0x2 << 29)
+
+#define MI_NOOP (CMD_MI | 0)
+#define MI_BATCH_BUFFER_END (CMD_MI | (0xA << 23))
+#define MI_FLUSH (CMD_MI | (0x4 << 23))
+#define STATE_INSTRUCTION_CACHE_INVALIDATE (0x1 << 0)
+
+#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 0x04)
+#define XY_COLOR_BLT_WRITE_ALPHA (1 << 21)
+#define XY_COLOR_BLT_WRITE_RGB (1 << 20)
+#define XY_COLOR_BLT_DST_TILED (1 << 11)
+
+/* BR13 */
+#define BR13_565 (0x1 << 24)
+#define BR13_8888 (0x3 << 24)
+
+struct dri_state;
+typedef struct _XDisplay Display;
+
+typedef struct intel_driver
+{
+ dri_bufmgr *bufmgr;
+ int fd;
+ int device_id;
+ int gen_ver;
+ sigset_t sa_mask;
+ pthread_mutex_t ctxmutex;
+ int locked;
+ int master;
+ Display *x11_display;
+ struct dri_state *dri_ctx;
+} intel_driver_t;
+
+/* device control */
+extern void intel_driver_lock_hardware(intel_driver_t*);
+extern void intel_driver_unlock_hardware(intel_driver_t*);
+
+/* methods working in shared mode */
+extern dri_bo* intel_driver_share_buffer(intel_driver_t*, uint32_t name);
+extern uint32_t intel_driver_shared_name(intel_driver_t*, dri_bo*);
+
+/* init driver shared with X using dri state, acquired from X Display */
+extern int intel_driver_init_shared(intel_driver_t*, struct dri_state*);
+
+/* init driver in master mode (when X is not using the card)
+ * usually dev_name = "/dev/dri/card0"
+ */
+extern int intel_driver_init_master(intel_driver_t*, const char* dev_name);
+
+/* terminate driver and all underlying structures */
+extern int intel_driver_terminate(intel_driver_t*);
+
+/* simple check if driver was initialized (checking fd should suffice) */
+extern int intel_driver_is_active(intel_driver_t*);
+
+/* query device parameters using driver ioctl */
+extern int intel_driver_get_param(intel_driver_t*, int param, int *value);
+
+/* init the call backs used by the ocl driver */
+extern void intel_setup_callbacks(void);
+
+#endif /* _INTEL_DRIVER_H_ */
+
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
new file mode 100644
index 0000000..f48949a
--- /dev/null
+++ b/src/intel/intel_gpgpu.c
@@ -0,0 +1,668 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ * Alexei Soupikov <alexei.soupikov at intel.com>
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stddef.h>
+
+#include "intel/intel_gpgpu.h"
+#include "intel/intel_defines.h"
+#include "intel/intel_structs.h"
+#include "intel/intel_batchbuffer.h"
+#include "intel/intel_driver.h"
+
+#include "cl_alloc.h"
+#include "cl_utils.h"
+
+#define GEN_CMD_MEDIA_OBJECT (0x71000000)
+#define MO_TS_BIT (1 << 24)
+#define MO_RETAIN_BIT (1 << 28)
+#define SAMPLER_STATE_SIZE (16)
+
+/* Stores both binding tables and surface states */
+typedef struct surface_heap {
+ uint32_t binding_table[256];
+ char surface[256][sizeof(gen6_surface_state_t)];
+} surface_heap_t;
+
+#define MAX_IF_DESC 32
+
+/* We can bind only a limited number of buffers */
+enum { max_buf_n = 128 };
+
+/* Handle GPGPU state */
+struct intel_gpgpu
+{
+ intel_driver_t *drv;
+ intel_batchbuffer_t *batch;
+ cl_gpgpu_kernel *ker;
+ drm_intel_bo *binded_buf[max_buf_n]; /* all buffers binded for the call */
+ uint32_t binded_offset[max_buf_n]; /* their offsets in the constant buffer */
+ uint32_t binded_n; /* number of buffers binded */
+
+ struct { drm_intel_bo *bo; } stack_b;
+ struct { drm_intel_bo *bo; } idrt_b;
+ struct { drm_intel_bo *bo; } surface_heap_b;
+ struct { drm_intel_bo *bo; } vfe_state_b;
+ struct { drm_intel_bo *bo; } curbe_b;
+ struct { drm_intel_bo *bo; } sampler_state_b;
+ struct { drm_intel_bo *bo; } perf_b;
+
+ struct {
+ uint32_t num_cs_entries;
+ uint32_t size_cs_entry; /* size of one entry in 512bit elements */
+ } urb;
+
+ uint32_t max_threads; /* max threads requested by the user */
+};
+
+typedef struct intel_gpgpu intel_gpgpu_t;
+
+static void
+intel_gpgpu_delete(intel_gpgpu_t *gpgpu)
+{
+ if (gpgpu == NULL)
+ return;
+ if (gpgpu->surface_heap_b.bo)
+ drm_intel_bo_unreference(gpgpu->surface_heap_b.bo);
+ if (gpgpu->idrt_b.bo)
+ drm_intel_bo_unreference(gpgpu->idrt_b.bo);
+ if (gpgpu->vfe_state_b.bo)
+ drm_intel_bo_unreference(gpgpu->vfe_state_b.bo);
+ if (gpgpu->curbe_b.bo)
+ drm_intel_bo_unreference(gpgpu->curbe_b.bo);
+ if (gpgpu->sampler_state_b.bo)
+ drm_intel_bo_unreference(gpgpu->sampler_state_b.bo);
+ if (gpgpu->perf_b.bo)
+ drm_intel_bo_unreference(gpgpu->perf_b.bo);
+ if (gpgpu->stack_b.bo)
+ drm_intel_bo_unreference(gpgpu->stack_b.bo);
+ intel_batchbuffer_delete(gpgpu->batch);
+ cl_free(gpgpu);
+}
+
+static intel_gpgpu_t*
+intel_gpgpu_new(intel_driver_t *drv)
+{
+ intel_gpgpu_t *state = NULL;
+
+ TRY_ALLOC_NO_ERR (state, CALLOC(intel_gpgpu_t));
+ state->drv = drv;
+ state->batch = intel_batchbuffer_new(state->drv);
+ assert(state->batch);
+ intel_batchbuffer_init(state->batch, state->drv);
+
+exit:
+ return state;
+error:
+ intel_gpgpu_delete(state);
+ state = NULL;
+ goto exit;
+}
+
+static void
+intel_gpgpu_select_pipeline(intel_gpgpu_t *gpgpu)
+{
+ BEGIN_BATCH(gpgpu->batch, 1);
+ OUT_BATCH(gpgpu->batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
+ ADVANCE_BATCH(gpgpu->batch);
+}
+
+static void
+intel_gpgpu_set_base_address(intel_gpgpu_t *gpgpu)
+{
+ const uint32_t def_cc = cc_llc_l3; /* default Cache Control value */
+ BEGIN_BATCH(gpgpu->batch, 10);
+ OUT_BATCH(gpgpu->batch, CMD_STATE_BASE_ADDRESS | 8);
+ /* 0, Gen State Mem Obj CC, Stateless Mem Obj CC, Stateless Access Write Back */
+ OUT_BATCH(gpgpu->batch, 0 | (def_cc << 8) | (def_cc << 4) | (0 << 3)| BASE_ADDRESS_MODIFY); /* General State Base Addr */
+ /* 0, State Mem Obj CC */
+ /* We use a state base address for the surface heap since IVB clamp the
+ * binding table pointer at 11 bits. So, we cannot use pointers directly while
+ * using the surface heap
+ */
+ OUT_RELOC(gpgpu->batch, gpgpu->surface_heap_b.bo,
+ I915_GEM_DOMAIN_INSTRUCTION,
+ I915_GEM_DOMAIN_INSTRUCTION,
+ 0 | (def_cc << 8) | (def_cc << 4) | (0 << 3)| BASE_ADDRESS_MODIFY);
+ OUT_BATCH(gpgpu->batch, 0 | (def_cc << 8) | BASE_ADDRESS_MODIFY); /* Dynamic State Base Addr */
+ OUT_BATCH(gpgpu->batch, 0 | (def_cc << 8) | BASE_ADDRESS_MODIFY); /* Indirect Obj Base Addr */
+ OUT_BATCH(gpgpu->batch, 0 | (def_cc << 8) | BASE_ADDRESS_MODIFY); /* Instruction Base Addr */
+ /* If we output an AUB file, we limit the total size to 64MB */
+#if USE_FULSIM
+ OUT_BATCH(gpgpu->batch, 0x04000000 | BASE_ADDRESS_MODIFY); /* General State Access Upper Bound */
+ OUT_BATCH(gpgpu->batch, 0x04000000 | BASE_ADDRESS_MODIFY); /* Dynamic State Access Upper Bound */
+ OUT_BATCH(gpgpu->batch, 0x04000000 | BASE_ADDRESS_MODIFY); /* Indirect Obj Access Upper Bound */
+ OUT_BATCH(gpgpu->batch, 0x04000000 | BASE_ADDRESS_MODIFY); /* Instruction Access Upper Bound */
+#else
+ OUT_BATCH(gpgpu->batch, 0 | BASE_ADDRESS_MODIFY);
+ OUT_BATCH(gpgpu->batch, 0 | BASE_ADDRESS_MODIFY);
+ OUT_BATCH(gpgpu->batch, 0 | BASE_ADDRESS_MODIFY);
+ OUT_BATCH(gpgpu->batch, 0 | BASE_ADDRESS_MODIFY);
+#endif /* USE_FULSIM */
+ ADVANCE_BATCH(gpgpu->batch);
+}
+
+static void
+intel_gpgpu_load_vfe_state(intel_gpgpu_t *gpgpu)
+{
+ BEGIN_BATCH(gpgpu->batch, 8);
+ OUT_BATCH(gpgpu->batch, CMD_MEDIA_STATE_POINTERS | (8-2));
+
+ gen6_vfe_state_inline_t* vfe = (gen6_vfe_state_inline_t*)
+ intel_batchbuffer_alloc_space(gpgpu->batch,0);
+
+ memset(vfe, 0, sizeof(struct gen6_vfe_state_inline));
+ vfe->vfe1.gpgpu_mode = 1;
+ vfe->vfe1.bypass_gateway_ctl = 1;
+ vfe->vfe1.reset_gateway_timer = 1;
+ vfe->vfe1.max_threads = gpgpu->max_threads - 1;
+ vfe->vfe1.urb_entries = 64;
+ vfe->vfe3.curbe_size = 480;
+ vfe->vfe4.scoreboard_mask = 0;
+ intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen6_vfe_state_inline_t));
+ ADVANCE_BATCH(gpgpu->batch);
+}
+
+static void
+intel_gpgpu_load_constant_buffer(intel_gpgpu_t *gpgpu)
+{
+ BEGIN_BATCH(gpgpu->batch, 4);
+ OUT_BATCH(gpgpu->batch, CMD(2,0,1) | (4 - 2)); /* length-2 */
+ OUT_BATCH(gpgpu->batch, 0); /* mbz */
+// XXX
+#if 1
+ OUT_BATCH(gpgpu->batch,
+ gpgpu->urb.size_cs_entry*
+ gpgpu->urb.num_cs_entries*32);
+#else
+ OUT_BATCH(gpgpu->batch, 5120);
+#endif
+ OUT_RELOC(gpgpu->batch, gpgpu->curbe_b.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ ADVANCE_BATCH(gpgpu->batch);
+}
+
+static void
+intel_gpgpu_load_idrt(intel_gpgpu_t *gpgpu)
+{
+ BEGIN_BATCH(gpgpu->batch, 4);
+ OUT_BATCH(gpgpu->batch, CMD(2,0,2) | (4 - 2)); /* length-2 */
+ OUT_BATCH(gpgpu->batch, 0); /* mbz */
+ OUT_BATCH(gpgpu->batch, 1 << 5);
+ OUT_RELOC(gpgpu->batch, gpgpu->idrt_b.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ ADVANCE_BATCH(gpgpu->batch);
+}
+
+static const uint32_t gpgpu_l3_config_reg1[] = {
+ 0x00080040, 0x02040040, 0x00800040, 0x01000038,
+ 0x02000030, 0x01000038, 0x00000038, 0x00000040,
+ 0x0A140091, 0x09100091, 0x08900091, 0x08900091
+};
+
+static const uint32_t gpgpu_l3_config_reg2[] = {
+ 0x00000000, 0x00000000, 0x00080410, 0x00080410,
+ 0x00040410, 0x00040420, 0x00080420, 0x00080020,
+ 0x00204080, 0x00244890, 0x00284490, 0x002444A0
+};
+
+static void
+intel_gpgpu_pipe_control(intel_gpgpu_t *gpgpu)
+{
+ BEGIN_BATCH(gpgpu->batch, SIZEOF32(gen6_pipe_control_t));
+ gen6_pipe_control_t* pc = (gen6_pipe_control_t*)
+ intel_batchbuffer_alloc_space(gpgpu->batch, 0);
+ memset(pc, 0, sizeof(*pc));
+ pc->dw0.length = SIZEOF32(gen6_pipe_control_t) - 2;
+ pc->dw0.instruction_subopcode = GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL;
+ pc->dw0.instruction_opcode = GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL;
+ pc->dw0.instruction_pipeline = GEN7_PIPE_CONTROL_3D;
+ pc->dw0.instruction_type = GEN7_PIPE_CONTROL_INSTRUCTION_GFX;
+ pc->dw1.render_target_cache_flush_enable = 1;
+ pc->dw1.cs_stall = 1;
+ pc->dw1.dc_flush_enable = 1;
+ ADVANCE_BATCH(gpgpu->batch);
+}
+
+static void
+intel_gpgpu_set_L3(intel_gpgpu_t *gpgpu, uint32_t use_slm)
+{
+ BEGIN_BATCH(gpgpu->batch, 6);
+ OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
+ OUT_BATCH(gpgpu->batch, GEN7_L3_CNTL_REG2_ADDRESS_OFFSET);
+ if (use_slm)
+ OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg1[8]);
+ else
+ OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg1[4]);
+
+ OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
+ OUT_BATCH(gpgpu->batch, GEN7_L3_CNTL_REG3_ADDRESS_OFFSET);
+ if (use_slm)
+ OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg2[8]);
+ else
+ OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg2[4]);
+ ADVANCE_BATCH(gpgpu->batch);
+ intel_gpgpu_pipe_control(gpgpu);
+}
+
+static void
+intel_gpgpu_batch_start(intel_gpgpu_t *gpgpu)
+{
+ intel_batchbuffer_start_atomic(gpgpu->batch, 256);
+ intel_gpgpu_pipe_control(gpgpu);
+ intel_gpgpu_set_L3(gpgpu, gpgpu->ker->use_slm);
+ intel_gpgpu_select_pipeline(gpgpu);
+ intel_gpgpu_set_base_address(gpgpu);
+ intel_gpgpu_load_vfe_state(gpgpu);
+ intel_gpgpu_load_constant_buffer(gpgpu);
+ intel_gpgpu_load_idrt(gpgpu);
+
+ if (gpgpu->perf_b.bo) {
+ BEGIN_BATCH(gpgpu->batch, 3);
+ OUT_BATCH(gpgpu->batch,
+ (0x28 << 23) | /* MI_REPORT_PERF_COUNT */
+ (3 - 2)); /* length-2 */
+ OUT_RELOC(gpgpu->batch, gpgpu->perf_b.bo,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER,
+ 0 | /* Offset for the start "counters" */
+ 1); /* Use GTT and not PGTT */
+ OUT_BATCH(gpgpu->batch, 0);
+ ADVANCE_BATCH(gpgpu->batch);
+ }
+}
+
+static void
+intel_gpgpu_batch_end(intel_gpgpu_t *gpgpu, int32_t flush_mode)
+{
+ /* Insert the performance counter command */
+ if (gpgpu->perf_b.bo) {
+ BEGIN_BATCH(gpgpu->batch, 3);
+ OUT_BATCH(gpgpu->batch,
+ (0x28 << 23) | /* MI_REPORT_PERF_COUNT */
+ (3 - 2)); /* length-2 */
+ OUT_RELOC(gpgpu->batch, gpgpu->perf_b.bo,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER,
+ 512 | /* Offset for the end "counters" */
+ 1); /* Use GTT and not PGTT */
+ OUT_BATCH(gpgpu->batch, 0);
+ ADVANCE_BATCH(gpgpu->batch);
+ }
+
+ if(flush_mode) intel_gpgpu_pipe_control(gpgpu);
+ intel_batchbuffer_end_atomic(gpgpu->batch);
+}
+
+static void
+intel_gpgpu_batch_reset(intel_gpgpu_t *gpgpu, size_t sz)
+{
+ intel_batchbuffer_reset(gpgpu->batch, sz);
+}
+
+static void
+intel_gpgpu_flush(intel_gpgpu_t *gpgpu)
+{
+ intel_batchbuffer_flush(gpgpu->batch);
+}
+
+static void
+intel_gpgpu_state_init(intel_gpgpu_t *gpgpu,
+ uint32_t max_threads,
+ uint32_t size_cs_entry)
+{
+ drm_intel_bufmgr *bufmgr = gpgpu->drv->bufmgr;
+ drm_intel_bo *bo;
+
+ /* Binded buffers */
+ gpgpu->binded_n = 0;
+
+ /* URB */
+ gpgpu->urb.num_cs_entries = 64;
+ gpgpu->urb.size_cs_entry = size_cs_entry;
+ gpgpu->max_threads = max_threads;
+
+ /* Constant buffer */
+ if(gpgpu->curbe_b.bo)
+ dri_bo_unreference(gpgpu->curbe_b.bo);
+ uint32_t size_cb = gpgpu->urb.num_cs_entries * gpgpu->urb.size_cs_entry * 64;
+ size_cb = ALIGN(size_cb, 4096);
+ bo = dri_bo_alloc(gpgpu->drv->bufmgr, "CONSTANT_BUFFER", size_cb, 64);
+ assert(bo);
+ gpgpu->curbe_b.bo = bo;
+
+ /* surface state */
+ if(gpgpu->surface_heap_b.bo)
+ dri_bo_unreference(gpgpu->surface_heap_b.bo);
+ bo = dri_bo_alloc(bufmgr,
+ "SURFACE_HEAP",
+ sizeof(surface_heap_t),
+ 32);
+ assert(bo);
+ dri_bo_map(bo, 1);
+ memset(bo->virtual, 0, sizeof(surface_heap_t));
+ gpgpu->surface_heap_b.bo = bo;
+
+ /* Interface descriptor remap table */
+ if(gpgpu->idrt_b.bo)
+ dri_bo_unreference(gpgpu->idrt_b.bo);
+ bo = dri_bo_alloc(bufmgr,
+ "IDRT",
+ MAX_IF_DESC * sizeof(struct gen6_interface_descriptor),
+ 32);
+ assert(bo);
+ gpgpu->idrt_b.bo = bo;
+
+ /* vfe state */
+ if(gpgpu->vfe_state_b.bo)
+ dri_bo_unreference(gpgpu->vfe_state_b.bo);
+ gpgpu->vfe_state_b.bo = NULL;
+
+ /* sampler state */
+ if (gpgpu->sampler_state_b.bo)
+ dri_bo_unreference(gpgpu->sampler_state_b.bo);
+ bo = dri_bo_alloc(gpgpu->drv->bufmgr,
+ "SAMPLER_STATE",
+ GEN_MAX_SAMPLERS * sizeof(gen6_sampler_state_t),
+ 32);
+ assert(bo);
+ dri_bo_map(bo, 1);
+ memset(bo->virtual, 0, sizeof(gen6_sampler_state_t) * GEN_MAX_SAMPLERS);
+ gpgpu->sampler_state_b.bo = bo;
+
+ /* stack */
+ if (gpgpu->stack_b.bo)
+ dri_bo_unreference(gpgpu->stack_b.bo);
+ gpgpu->stack_b.bo = NULL;
+}
+
+static void
+intel_gpgpu_set_buf_reloc_gen7(intel_gpgpu_t *gpgpu, int32_t index, dri_bo* obj_bo)
+{
+ surface_heap_t *heap = gpgpu->surface_heap_b.bo->virtual;
+ heap->binding_table[index] = offsetof(surface_heap_t, surface) +
+ index * sizeof(gen7_surface_state_t);
+ dri_bo_emit_reloc(gpgpu->surface_heap_b.bo,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER,
+ 0,
+ heap->binding_table[index] +
+ offsetof(gen7_surface_state_t, ss1),
+ obj_bo);
+}
+
+/* Map address space with two 2GB surfaces. One surface for untyped message and
+ * one surface for byte scatters / gathers. Actually the HW does not require two
+ * surfaces but Fulsim complains
+ */
+static void
+intel_gpgpu_map_address_space(intel_gpgpu_t *gpgpu)
+{
+ surface_heap_t *heap = gpgpu->surface_heap_b.bo->virtual;
+ gen7_surface_state_t *ss0 = (gen7_surface_state_t *) heap->surface[0];
+ gen7_surface_state_t *ss1 = (gen7_surface_state_t *) heap->surface[1];
+ memset(ss0, 0, sizeof(gen7_surface_state_t));
+ memset(ss1, 0, sizeof(gen7_surface_state_t));
+ ss1->ss0.surface_type = ss0->ss0.surface_type = I965_SURFACE_BUFFER;
+ ss1->ss0.surface_format = ss0->ss0.surface_format = I965_SURFACEFORMAT_RAW;
+ ss1->ss2.width = ss0->ss2.width = 127; /* bits 6:0 of sz */
+ ss1->ss2.height = ss0->ss2.height = 16383; /* bits 20:7 of sz */
+ ss0->ss3.depth = 1023; /* bits 30:21 of sz */
+ ss1->ss3.depth = 510; /* bits 30:21 of sz */
+ ss1->ss5.cache_control = ss0->ss5.cache_control = cc_llc_l3;
+ heap->binding_table[0] = offsetof(surface_heap_t, surface);
+ heap->binding_table[1] = sizeof(gen7_surface_state_t) + offsetof(surface_heap_t, surface);
+}
+
+static void
+intel_gpgpu_bind_image2D_gen7(intel_gpgpu_t *gpgpu,
+ int32_t index,
+ dri_bo* obj_bo,
+ uint32_t format,
+ int32_t w,
+ int32_t h,
+ int32_t pitch,
+ int32_t tiling)
+{
+ surface_heap_t *heap = gpgpu->surface_heap_b.bo->virtual;
+ gen7_surface_state_t *ss = (gen7_surface_state_t *) heap->surface[index];
+ memset(ss, 0, sizeof(*ss));
+ ss->ss0.surface_type = I965_SURFACE_2D;
+ ss->ss0.surface_format = format;
+ ss->ss1.base_addr = obj_bo->offset;
+ ss->ss2.width = w - 1;
+ ss->ss2.height = h - 1;
+ ss->ss3.pitch = pitch - 1;
+ ss->ss5.cache_control = cc_llc_l3;
+ if (tiling == GPGPU_TILE_X) {
+ ss->ss0.tiled_surface = 1;
+ ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
+ } else if (tiling == GPGPU_TILE_Y) {
+ ss->ss0.tiled_surface = 1;
+ ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
+ }
+ intel_gpgpu_set_buf_reloc_gen7(gpgpu, index, obj_bo);
+}
+
+static void
+intel_gpgpu_bind_buf(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t offset, uint32_t cchint)
+{
+ assert(gpgpu->binded_n < max_buf_n);
+ gpgpu->binded_buf[gpgpu->binded_n] = buf;
+ gpgpu->binded_offset[gpgpu->binded_n] = offset;
+ gpgpu->binded_n++;
+}
+
+static void
+intel_gpgpu_set_stack(intel_gpgpu_t *gpgpu, uint32_t offset, uint32_t size, uint32_t cchint)
+{
+ drm_intel_bufmgr *bufmgr = gpgpu->drv->bufmgr;
+ gpgpu->stack_b.bo = drm_intel_bo_alloc(bufmgr, "STACK", size, 64);
+ intel_gpgpu_bind_buf(gpgpu, gpgpu->stack_b.bo, offset, cchint);
+}
+
+static void
+intel_gpgpu_bind_image2D(intel_gpgpu_t *gpgpu,
+ int32_t index,
+ cl_buffer *obj_bo,
+ uint32_t format,
+ int32_t w,
+ int32_t h,
+ int32_t pitch,
+ cl_gpgpu_tiling tiling)
+{
+ assert(index < GEN_MAX_SURFACES);
+ intel_gpgpu_bind_image2D_gen7(gpgpu, index, (drm_intel_bo*) obj_bo, format, w, h, pitch, tiling);
+}
+
+static void
+intel_gpgpu_build_idrt(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel)
+{
+ gen6_interface_descriptor_t *desc;
+ drm_intel_bo *bo = NULL, *ker_bo = NULL;
+
+ bo = gpgpu->idrt_b.bo;
+ dri_bo_map(bo, 1);
+ assert(bo->virtual);
+ desc = (gen6_interface_descriptor_t*) bo->virtual;
+
+ memset(desc, 0, sizeof(*desc));
+ ker_bo = (drm_intel_bo *) kernel->bo;
+ desc->desc0.kernel_start_pointer = ker_bo->offset >> 6; /* reloc */
+ desc->desc1.single_program_flow = 1;
+ desc->desc2.sampler_state_pointer = gpgpu->sampler_state_b.bo->offset >> 5;
+ desc->desc3.binding_table_entry_count = 0; /* no prefetch */
+ desc->desc3.binding_table_pointer = 0;
+ desc->desc4.curbe_read_len = kernel->cst_sz / 32;
+ desc->desc4.curbe_read_offset = 0;
+
+ /* Barriers / SLM are automatically handled on Gen7+ */
+ if (gpgpu->drv->gen_ver == 7 || gpgpu->drv->gen_ver == 75) {
+ size_t slm_sz = kernel->slm_sz;
+ desc->desc5.group_threads_num = kernel->use_slm ? kernel->thread_n : 0;
+ desc->desc5.barrier_enable = kernel->use_slm;
+ if (slm_sz <= 4*KB)
+ slm_sz = 4*KB;
+ else if (slm_sz <= 8*KB)
+ slm_sz = 8*KB;
+ else if (slm_sz <= 16*KB)
+ slm_sz = 16*KB;
+ else if (slm_sz <= 32*KB)
+ slm_sz = 32*KB;
+ else if (slm_sz <= 64*KB)
+ slm_sz = 64*KB;
+ slm_sz = slm_sz >> 12;
+ desc->desc5.slm_sz = slm_sz;
+ }
+ else
+ desc->desc5.group_threads_num = kernel->barrierID; /* BarrierID on GEN6 */
+
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0,
+ offsetof(gen6_interface_descriptor_t, desc0),
+ ker_bo);
+
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0,
+ offsetof(gen6_interface_descriptor_t, desc2),
+ gpgpu->sampler_state_b.bo);
+ dri_bo_unmap(bo);
+}
+
+static void
+intel_gpgpu_upload_constants(intel_gpgpu_t *gpgpu, const void* data, uint32_t size)
+{
+ unsigned char *curbe = NULL;
+ cl_gpgpu_kernel *k = gpgpu->ker;
+ uint32_t i, j;
+
+ /* Upload the data first */
+ dri_bo_map(gpgpu->curbe_b.bo, 1);
+ assert(gpgpu->curbe_b.bo->virtual);
+ curbe = (unsigned char *) gpgpu->curbe_b.bo->virtual;
+ memcpy(curbe, data, size);
+
+ /* Now put all the relocations for our flat address space */
+ for (i = 0; i < k->thread_n; ++i)
+ for (j = 0; j < gpgpu->binded_n; ++j) {
+ *(uint32_t*)(curbe + gpgpu->binded_offset[j]+i*k->cst_sz) = gpgpu->binded_buf[j]->offset;
+ drm_intel_bo_emit_reloc(gpgpu->curbe_b.bo,
+ gpgpu->binded_offset[j]+i*k->cst_sz,
+ gpgpu->binded_buf[j],
+ 0,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER);
+ }
+ dri_bo_unmap(gpgpu->curbe_b.bo);
+}
+
+static void
+intel_gpgpu_upload_samplers(intel_gpgpu_t *gpgpu, const void *data, uint32_t n)
+{
+ if (n) {
+ const size_t sz = n * sizeof(gen6_sampler_state_t);
+ memcpy(gpgpu->sampler_state_b.bo->virtual, data, sz);
+ }
+}
+
+static void
+intel_gpgpu_states_setup(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel)
+{
+ gpgpu->ker = kernel;
+ intel_gpgpu_build_idrt(gpgpu, kernel);
+ intel_gpgpu_map_address_space(gpgpu);
+ dri_bo_unmap(gpgpu->surface_heap_b.bo);
+ dri_bo_unmap(gpgpu->sampler_state_b.bo);
+}
+
+static void
+intel_gpgpu_set_perf_counters(intel_gpgpu_t *gpgpu, cl_buffer *perf)
+{
+ if (gpgpu->perf_b.bo)
+ drm_intel_bo_unreference(gpgpu->perf_b.bo);
+ drm_intel_bo_reference((drm_intel_bo*) perf);
+ gpgpu->perf_b.bo = (drm_intel_bo*) perf;
+}
+
+static void
+intel_gpgpu_walker(intel_gpgpu_t *gpgpu,
+ uint32_t simd_sz,
+ uint32_t thread_n,
+ const size_t global_wk_off[3],
+ const size_t global_wk_sz[3],
+ const size_t local_wk_sz[3])
+{
+ const uint32_t global_wk_dim[3] = {
+ global_wk_sz[0] / local_wk_sz[0],
+ global_wk_sz[1] / local_wk_sz[1],
+ global_wk_sz[2] / local_wk_sz[2]
+ };
+ assert(simd_sz == 8 || simd_sz == 16);
+ BEGIN_BATCH(gpgpu->batch, 11);
+ OUT_BATCH(gpgpu->batch, CMD_GPGPU_WALKER | 9);
+ OUT_BATCH(gpgpu->batch, 0); /* kernel index == 0 */
+ if (simd_sz == 16)
+ OUT_BATCH(gpgpu->batch, (1 << 30) | (thread_n-1)); /* SIMD16 | thread max */
+ else
+ OUT_BATCH(gpgpu->batch, (0 << 30) | (thread_n-1)); /* SIMD8 | thread max */
+ OUT_BATCH(gpgpu->batch, global_wk_off[0]);
+ OUT_BATCH(gpgpu->batch, global_wk_dim[0]);
+ OUT_BATCH(gpgpu->batch, global_wk_off[1]);
+ OUT_BATCH(gpgpu->batch, global_wk_dim[1]);
+ OUT_BATCH(gpgpu->batch, global_wk_off[2]);
+ OUT_BATCH(gpgpu->batch, global_wk_dim[2]);
+ OUT_BATCH(gpgpu->batch, ~0x0);
+ OUT_BATCH(gpgpu->batch, ~0x0);
+ ADVANCE_BATCH(gpgpu->batch);
+
+ BEGIN_BATCH(gpgpu->batch, 2);
+ OUT_BATCH(gpgpu->batch, CMD_MEDIA_STATE_FLUSH | 0);
+ OUT_BATCH(gpgpu->batch, 0); /* kernel index == 0 */
+ ADVANCE_BATCH(gpgpu->batch);
+}
+
+LOCAL void
+intel_set_gpgpu_callbacks(void)
+{
+ cl_gpgpu_new = (cl_gpgpu_new_cb *) intel_gpgpu_new;
+ cl_gpgpu_delete = (cl_gpgpu_delete_cb *) intel_gpgpu_delete;
+ cl_gpgpu_bind_image2D = (cl_gpgpu_bind_image2D_cb *) intel_gpgpu_bind_image2D;
+ cl_gpgpu_bind_buf = (cl_gpgpu_bind_buf_cb *) intel_gpgpu_bind_buf;
+ cl_gpgpu_set_stack = (cl_gpgpu_set_stack_cb *) intel_gpgpu_set_stack;
+ cl_gpgpu_state_init = (cl_gpgpu_state_init_cb *) intel_gpgpu_state_init;
+ cl_gpgpu_set_perf_counters = (cl_gpgpu_set_perf_counters_cb *) intel_gpgpu_set_perf_counters;
+ cl_gpgpu_upload_constants = (cl_gpgpu_upload_constants_cb *) intel_gpgpu_upload_constants;
+ cl_gpgpu_states_setup = (cl_gpgpu_states_setup_cb *) intel_gpgpu_states_setup;
+ cl_gpgpu_upload_samplers = (cl_gpgpu_upload_samplers_cb *) intel_gpgpu_upload_samplers;
+ cl_gpgpu_batch_reset = (cl_gpgpu_batch_reset_cb *) intel_gpgpu_batch_reset;
+ cl_gpgpu_batch_start = (cl_gpgpu_batch_start_cb *) intel_gpgpu_batch_start;
+ cl_gpgpu_batch_end = (cl_gpgpu_batch_end_cb *) intel_gpgpu_batch_end;
+ cl_gpgpu_flush = (cl_gpgpu_flush_cb *) intel_gpgpu_flush;
+ cl_gpgpu_walker = (cl_gpgpu_walker_cb *) intel_gpgpu_walker;
+}
+
diff --git a/src/intel/intel_gpgpu.h b/src/intel/intel_gpgpu.h
new file mode 100644
index 0000000..9918b35
--- /dev/null
+++ b/src/intel/intel_gpgpu.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ * Alexei Soupikov <alexei.soupikov at intel.com>
+ */
+
+#ifndef __INTEL_GPGPU_H__
+#define __INTEL_GPGPU_H__
+
+#include "cl_utils.h"
+#include "cl_driver.h"
+
+#include <stdlib.h>
+#include <stdint.h>
+
+/* Set the gpgpu related call backs */
+extern void intel_set_gpgpu_callbacks(void);
+
+#endif /* __INTEL_GPGPU_H__ */
+
diff --git a/src/intel/intel_structs.h b/src/intel/intel_structs.h
new file mode 100644
index 0000000..ff339c5
--- /dev/null
+++ b/src/intel/intel_structs.h
@@ -0,0 +1,434 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/*
+ * Copyright 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __INTEL_STRUCTS_H__
+#define __INTEL_STRUCTS_H__
+
+#include <stdint.h>
+
+typedef struct gen6_interface_descriptor
+{
+ struct {
+ uint32_t pad6:6;
+ uint32_t kernel_start_pointer:26;
+ } desc0;
+
+ struct {
+ uint32_t pad:7;
+ uint32_t software_exception:1;
+ uint32_t pad2:3;
+ uint32_t maskstack_exception:1;
+ uint32_t pad3:1;
+ uint32_t illegal_opcode_exception:1;
+ uint32_t pad4:2;
+ uint32_t floating_point_mode:1;
+ uint32_t thread_priority:1;
+ uint32_t single_program_flow:1;
+ uint32_t pad5:1;
+ uint32_t pad6:6;
+ uint32_t pad7:6;
+ } desc1;
+
+ struct {
+ uint32_t pad:2;
+ uint32_t sampler_count:3;
+ uint32_t sampler_state_pointer:27;
+ } desc2;
+
+ struct {
+ uint32_t binding_table_entry_count:5; /* prefetch entries only */
+ uint32_t binding_table_pointer:27; /* 11 bit only on IVB+ */
+ } desc3;
+
+ struct {
+ uint32_t curbe_read_offset:16; /* in GRFs */
+ uint32_t curbe_read_len:16; /* in GRFs */
+ } desc4;
+
+ struct {
+ uint32_t group_threads_num:8; /* 0..64, 0 - no barrier use */
+ uint32_t barrier_return_byte:8;
+ uint32_t slm_sz:5; /* 0..16 - 0K..64K */
+ uint32_t barrier_enable:1;
+ uint32_t rounding_mode:2;
+ uint32_t barrier_return_grf_offset:8;
+ } desc5;
+
+ uint32_t desc6; /* unused */
+ uint32_t desc7; /* unused */
+} gen6_interface_descriptor_t;
+
+typedef struct gen6_surface_state
+{
+ struct {
+ uint32_t cube_pos_z:1;
+ uint32_t cube_neg_z:1;
+ uint32_t cube_pos_y:1;
+ uint32_t cube_neg_y:1;
+ uint32_t cube_pos_x:1;
+ uint32_t cube_neg_x:1;
+ uint32_t pad:2;
+ uint32_t render_cache_read_mode:1;
+ uint32_t cube_map_corner_mode:1;
+ uint32_t mipmap_layout_mode:1;
+ uint32_t vert_line_stride_ofs:1;
+ uint32_t vert_line_stride:1;
+ uint32_t color_blend:1;
+ uint32_t writedisable_blue:1;
+ uint32_t writedisable_green:1;
+ uint32_t writedisable_red:1;
+ uint32_t writedisable_alpha:1;
+ uint32_t surface_format:9;
+ uint32_t data_return_format:1;
+ uint32_t pad0:1;
+ uint32_t surface_type:3;
+ } ss0;
+
+ struct {
+ uint32_t base_addr;
+ } ss1;
+
+ struct {
+ uint32_t render_target_rotation:2;
+ uint32_t mip_count:4;
+ uint32_t width:13;
+ uint32_t height:13;
+ } ss2;
+
+ struct {
+ uint32_t tile_walk:1;
+ uint32_t tiled_surface:1;
+ uint32_t pad:1;
+ uint32_t pitch:18;
+ uint32_t depth:11;
+ } ss3;
+
+ struct {
+ uint32_t multisample_pos_index:3;
+ uint32_t pad:1;
+ uint32_t multisample_count:3;
+ uint32_t pad1:1;
+ uint32_t rt_view_extent:9;
+ uint32_t min_array_elt:11;
+ uint32_t min_lod:4;
+ } ss4;
+
+ struct {
+ uint32_t pad:16;
+ uint32_t cache_control:2; /* different values for GT and IVB */
+ uint32_t gfdt:1; /* allows selective flushing of LLC (e.g. for scanout) */
+ uint32_t encrypted_data:1;
+ uint32_t y_offset:4;
+ uint32_t vertical_alignment:1;
+ uint32_t x_offset:7;
+ } ss5;
+
+ uint32_t ss6; /* unused */
+ uint32_t ss7; /* unused */
+} gen6_surface_state_t;
+
+typedef struct gen7_surface_state
+{
+ struct {
+ uint32_t cube_pos_z:1;
+ uint32_t cube_neg_z:1;
+ uint32_t cube_pos_y:1;
+ uint32_t cube_neg_y:1;
+ uint32_t cube_pos_x:1;
+ uint32_t cube_neg_x:1;
+ uint32_t media_boundary_pixel_mode:2;
+ uint32_t render_cache_rw_mode:1;
+ uint32_t pad1:1;
+ uint32_t surface_array_spacing:1;
+ uint32_t vertical_line_stride_offset:1;
+ uint32_t vertical_line_stride:1;
+ uint32_t tile_walk:1;
+ uint32_t tiled_surface:1;
+ uint32_t horizontal_alignment:1;
+ uint32_t vertical_alignment:2;
+ uint32_t surface_format:9;
+ uint32_t pad0:1;
+ uint32_t surface_array:1;
+ uint32_t surface_type:3;
+ } ss0;
+
+ struct {
+ uint32_t base_addr;
+ } ss1;
+
+ struct {
+ uint32_t width:14;
+ uint32_t pad1:2;
+ uint32_t height:14;
+ uint32_t pad0:2;
+ } ss2;
+
+ struct {
+ uint32_t pitch:18;
+ uint32_t pad0:3;
+ uint32_t depth:11;
+ } ss3;
+
+ uint32_t ss4;
+
+ struct {
+ uint32_t mip_count:4;
+ uint32_t surface_min_load:4;
+ uint32_t pad2:6;
+ uint32_t coherence_type:1;
+ uint32_t stateless_force_write_thru:1;
+ uint32_t cache_control:4;
+ uint32_t y_offset:4;
+ uint32_t pad0:1;
+ uint32_t x_offset:7;
+ } ss5;
+
+ uint32_t ss6; /* unused */
+ uint32_t ss7; /* unused */
+} gen7_surface_state_t;
+
+STATIC_ASSERT(sizeof(gen6_surface_state_t) == sizeof(gen7_surface_state_t));
+static const size_t surface_state_sz = sizeof(gen6_surface_state_t);
+
+typedef struct gen6_vfe_state_inline
+{
+ struct {
+ uint32_t per_thread_scratch_space:4;
+ uint32_t pad3:3;
+ uint32_t extend_vfe_state_present:1;
+ uint32_t pad2:2;
+ uint32_t scratch_base:22;
+ } vfe0;
+
+ struct {
+ uint32_t debug_counter_control:2;
+ uint32_t gpgpu_mode:1; /* 0 for SNB!!! */
+ uint32_t gateway_mmio_access:2;
+ uint32_t fast_preempt:1;
+ uint32_t bypass_gateway_ctl:1; /* 0 - legacy, 1 - no open/close */
+ uint32_t reset_gateway_timer:1;
+ uint32_t urb_entries:8;
+ uint32_t max_threads:16;
+ } vfe1;
+
+ struct {
+ uint32_t pad8:8;
+ uint32_t debug_object_id:24;
+ } vfe2;
+
+ struct {
+ uint32_t curbe_size:16; /* in GRFs */
+ uint32_t urb_size:16; /* in GRFs */
+ } vfe3;
+
+ struct {
+ uint32_t scoreboard_mask:32; /* 1 - enable the corresponding dependency */
+ } vfe4;
+
+ struct {
+ uint32_t scoreboard0_dx:4;
+ uint32_t scoreboard0_dy:4;
+ uint32_t scoreboard1_dx:4;
+ uint32_t scoreboard1_dy:4;
+ uint32_t scoreboard2_dx:4;
+ uint32_t scoreboard2_dy:4;
+ uint32_t scoreboard3_dx:4;
+ uint32_t scoreboard3_dy:4;
+ } vfe5;
+
+ struct {
+ uint32_t scoreboard4_dx:4;
+ uint32_t scoreboard4_dy:4;
+ uint32_t scoreboard5_dx:4;
+ uint32_t scoreboard5_dy:4;
+ uint32_t scoreboard6_dx:4;
+ uint32_t scoreboard6_dy:4;
+ uint32_t scoreboard7_dx:4;
+ uint32_t scoreboard7_dy:4;
+ } vfe6;
+} gen6_vfe_state_inline_t;
+
+typedef struct gen6_pipe_control
+{
+ struct {
+ uint32_t length : BITFIELD_RANGE(0, 7);
+ uint32_t reserved : BITFIELD_RANGE(8, 15);
+ uint32_t instruction_subopcode : BITFIELD_RANGE(16, 23);
+ uint32_t instruction_opcode : BITFIELD_RANGE(24, 26);
+ uint32_t instruction_pipeline : BITFIELD_RANGE(27, 28);
+ uint32_t instruction_type : BITFIELD_RANGE(29, 31);
+ } dw0;
+
+ struct {
+ uint32_t depth_cache_flush_enable : BITFIELD_BIT(0);
+ uint32_t stall_at_pixel_scoreboard : BITFIELD_BIT(1);
+ uint32_t state_cache_invalidation_enable : BITFIELD_BIT(2);
+ uint32_t constant_cache_invalidation_enable : BITFIELD_BIT(3);
+ uint32_t vf_cache_invalidation_enable : BITFIELD_BIT(4);
+ uint32_t dc_flush_enable : BITFIELD_BIT(5);
+ uint32_t protected_memory_app_id : BITFIELD_BIT(6);
+ uint32_t pipe_control_flush_enable : BITFIELD_BIT(7);
+ uint32_t notify_enable : BITFIELD_BIT(8);
+ uint32_t indirect_state_pointers_disable : BITFIELD_BIT(9);
+ uint32_t texture_cache_invalidation_enable : BITFIELD_BIT(10);
+ uint32_t instruction_cache_invalidate_enable : BITFIELD_BIT(11);
+ uint32_t render_target_cache_flush_enable : BITFIELD_BIT(12);
+ uint32_t depth_stall_enable : BITFIELD_BIT(13);
+ uint32_t post_sync_operation : BITFIELD_RANGE(14, 15);
+ uint32_t generic_media_state_clear : BITFIELD_BIT(16);
+ uint32_t synchronize_gfdt_surface : BITFIELD_BIT(17);
+ uint32_t tlb_invalidate : BITFIELD_BIT(18);
+ uint32_t global_snapshot_count_reset : BITFIELD_BIT(19);
+ uint32_t cs_stall : BITFIELD_BIT(20);
+ uint32_t store_data_index : BITFIELD_BIT(21);
+ uint32_t protected_memory_enable : BITFIELD_BIT(22);
+ uint32_t reserved : BITFIELD_RANGE(23, 31);
+ } dw1;
+
+ struct {
+ uint32_t reserved : BITFIELD_RANGE(0, 1);
+ uint32_t destination_address_type : BITFIELD_BIT(2);
+ uint32_t address : BITFIELD_RANGE(3, 31);
+ } dw2;
+
+ struct {
+ uint64_t data;
+ } qw0;
+} gen6_pipe_control_t;
+
+typedef struct gen6_sampler_state
+{
+ struct {
+ uint32_t shadow_function:3;
+ uint32_t lod_bias:11;
+ uint32_t min_filter:3;
+ uint32_t mag_filter:3;
+ uint32_t mip_filter:2;
+ uint32_t base_level:5;
+ uint32_t min_mag_neq:1;
+ uint32_t lod_preclamp:1;
+ uint32_t default_color_mode:1;
+ uint32_t pad0:1;
+ uint32_t disable:1;
+ } ss0;
+
+ struct {
+ uint32_t r_wrap_mode:3;
+ uint32_t t_wrap_mode:3;
+ uint32_t s_wrap_mode:3;
+ uint32_t cube_control_mode:1;
+ uint32_t pad:2;
+ uint32_t max_lod:10;
+ uint32_t min_lod:10;
+ } ss1;
+
+ struct {
+ uint32_t pad:5;
+ uint32_t default_color_pointer:27;
+ } ss2;
+
+ struct {
+ uint32_t non_normalized_coord:1;
+ uint32_t pad:12;
+ uint32_t address_round:6;
+ uint32_t max_aniso:3;
+ uint32_t chroma_key_mode:1;
+ uint32_t chroma_key_index:2;
+ uint32_t chroma_key_enable:1;
+ uint32_t monochrome_filter_width:3;
+ uint32_t monochrome_filter_height:3;
+ } ss3;
+} gen6_sampler_state_t;
+
+typedef struct gen7_sampler_state
+{
+ struct {
+ uint32_t aniso_algorithm:1;
+ uint32_t lod_bias:13;
+ uint32_t min_filter:3;
+ uint32_t mag_filter:3;
+ uint32_t mip_filter:2;
+ uint32_t base_level:5;
+ uint32_t pad1:1;
+ uint32_t lod_preclamp:1;
+ uint32_t default_color_mode:1;
+ uint32_t pad0:1;
+ uint32_t disable:1;
+ } ss0;
+
+ struct {
+ uint32_t cube_control_mode:1;
+ uint32_t shadow_function:3;
+ uint32_t pad:4;
+ uint32_t max_lod:12;
+ uint32_t min_lod:12;
+ } ss1;
+
+ struct {
+ uint32_t pad:5;
+ uint32_t default_color_pointer:27;
+ } ss2;
+
+ struct {
+ uint32_t r_wrap_mode:3;
+ uint32_t t_wrap_mode:3;
+ uint32_t s_wrap_mode:3;
+ uint32_t pad:1;
+ uint32_t non_normalized_coord:1;
+ uint32_t trilinear_quality:2;
+ uint32_t address_round:6;
+ uint32_t max_aniso:3;
+ uint32_t chroma_key_mode:1;
+ uint32_t chroma_key_index:2;
+ uint32_t chroma_key_enable:1;
+ uint32_t pad0:6;
+ } ss3;
+} gen7_sampler_state_t;
+
+STATIC_ASSERT(sizeof(gen6_sampler_state_t) == sizeof(gen7_sampler_state_t));
+
+#undef BITFIELD_BIT
+#undef BITFIELD_RANGE
+
+#endif /* __INTEL_STRUCTS_H__ */
+
diff --git a/src/x11/Makefile b/src/x11/Makefile
new file mode 100644
index 0000000..c8f77f9
--- /dev/null
+++ b/src/x11/Makefile
@@ -0,0 +1,4 @@
+TOP=../..
+SUBDIRS=.
+
+include $(TOP)/Makefile.shared
diff --git a/src/x11/dricommon.c b/src/x11/dricommon.c
new file mode 100644
index 0000000..a43ff6f
--- /dev/null
+++ b/src/x11/dricommon.c
@@ -0,0 +1,329 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ * Note: the code is taken from libva code base
+ */
+
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <X11/Xlibint.h>
+#include <X11/Xlib.h>
+#include "x11/va_dri2.h"
+#include "x11/va_dri2tokens.h"
+#include "x11/dricommon.h"
+#include "cl_utils.h"
+#include "cl_alloc.h"
+
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <assert.h>
+
+#define LOCAL __attribute__ ((visibility ("internal")))
+
+LOCAL dri_drawable_t*
+dri_state_do_drawable_hash(dri_state_t *state, XID drawable)
+{
+ int index = drawable % DRAWABLE_HASH_SZ;
+ struct dri_drawable *dri_drawable = state->drawable_hash[index];
+
+ while (dri_drawable) {
+ if (dri_drawable->x_drawable == drawable)
+ return dri_drawable;
+ dri_drawable = dri_drawable->next;
+ }
+
+ dri_drawable = dri_state_create_drawable(state, drawable);
+ dri_drawable->x_drawable = drawable;
+ dri_drawable->next = state->drawable_hash[index];
+ state->drawable_hash[index] = dri_drawable;
+
+ return dri_drawable;
+}
+
+LOCAL void
+dri_state_free_drawable_hash(dri_state_t *state)
+{
+ int i;
+ struct dri_drawable *dri_drawable, *prev;
+
+ for (i = 0; i < DRAWABLE_HASH_SZ; i++) {
+ dri_drawable = state->drawable_hash[i];
+
+ while (dri_drawable) {
+ prev = dri_drawable;
+ dri_drawable = prev->next;
+ dri_state_destroy_drawable(state, prev);
+ }
+ }
+}
+
+LOCAL dri_drawable_t*
+dri_state_get_drawable(dri_state_t *state, XID drawable)
+{
+ return dri_state_do_drawable_hash(state, drawable);
+}
+
+LOCAL void
+dri_state_init_drawable_hash_table(dri_state_t *state)
+{
+ int i;
+ for(i=0; i < DRAWABLE_HASH_SZ; i++)
+ state->drawable_hash[i] = NULL;
+}
+
+LOCAL void
+dri_state_delete(dri_state_t *state)
+{
+ if (state == NULL)
+ return;
+ dri_state_close(state);
+ cl_free(state);
+}
+
+LOCAL dri_state_t*
+dri_state_new(void)
+{
+ dri_state_t *state = NULL;
+ TRY_ALLOC_NO_ERR (state, CALLOC(dri_state_t));
+ state->fd = -1;
+ state->driConnectedFlag = NONE;
+ dri_state_init_drawable_hash_table(state);
+
+exit:
+ return state;
+error:
+ dri_state_delete(state);
+ state = NULL;
+ goto exit;
+}
+
+#define __DRI_BUFFER_FRONT_LEFT 0
+#define __DRI_BUFFER_BACK_LEFT 1
+#define __DRI_BUFFER_FRONT_RIGHT 2
+#define __DRI_BUFFER_BACK_RIGHT 3
+#define __DRI_BUFFER_DEPTH 4
+#define __DRI_BUFFER_STENCIL 5
+#define __DRI_BUFFER_ACCUM 6
+#define __DRI_BUFFER_FAKE_FRONT_LEFT 7
+#define __DRI_BUFFER_FAKE_FRONT_RIGHT 8
+
+typedef struct dri2_drawable
+{
+ struct dri_drawable base;
+ union dri_buffer buffers[5];
+ int width;
+ int height;
+ int has_backbuffer;
+ int back_index;
+ int front_index;
+} dri2_drawable_t;
+
+LOCAL dri_drawable_t*
+dri_state_create_drawable(dri_state_t *state, XID x_drawable)
+{
+ dri2_drawable_t *dri2_drwble;
+ dri2_drwble = (dri2_drawable_t*)calloc(1, sizeof(*dri2_drwble));
+
+ if (!dri2_drwble)
+ return NULL;
+
+ dri2_drwble->base.x_drawable = x_drawable;
+ dri2_drwble->base.x = 0;
+ dri2_drwble->base.y = 0;
+ VA_DRI2CreateDrawable(state->x11_dpy, x_drawable);
+
+ return &dri2_drwble->base;
+}
+
+LOCAL void
+dri_state_destroy_drawable(dri_state_t *state, dri_drawable_t *dri_drwble)
+{
+ VA_DRI2DestroyDrawable(state->x11_dpy, dri_drwble->x_drawable);
+ free(dri_drwble);
+}
+
+LOCAL void
+dri_state_swap_buffer(dri_state_t *state, dri_drawable_t *dri_drwble)
+{
+ dri2_drawable_t *dri2_drwble = (dri2_drawable_t*)dri_drwble;
+ XRectangle xrect;
+ XserverRegion region;
+
+ if (dri2_drwble->has_backbuffer) {
+ xrect.x = 0;
+ xrect.y = 0;
+ xrect.width = dri2_drwble->width;
+ xrect.height = dri2_drwble->height;
+
+ region = XFixesCreateRegion(state->x11_dpy, &xrect, 1);
+ VA_DRI2CopyRegion(state->x11_dpy, dri_drwble->x_drawable, region,
+ DRI2BufferFrontLeft, DRI2BufferBackLeft);
+ XFixesDestroyRegion(state->x11_dpy, region);
+ }
+}
+
+LOCAL union dri_buffer*
+dri_state_get_rendering_buffer(dri_state_t *state, dri_drawable_t *dri_drwble)
+{
+ dri2_drawable_t *dri2_drwble = (dri2_drawable_t *)dri_drwble;
+ int i;
+ int count;
+ unsigned int attachments[5];
+ VA_DRI2Buffer *buffers;
+
+ i = 0;
+ attachments[i++] = __DRI_BUFFER_BACK_LEFT;
+ attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
+ buffers = VA_DRI2GetBuffers(state->x11_dpy,
+ dri_drwble->x_drawable,
+ &dri2_drwble->width,
+ &dri2_drwble->height,
+ attachments,
+ i,
+ &count);
+ assert(buffers);
+ if (buffers == NULL)
+ return NULL;
+
+ dri2_drwble->has_backbuffer = 0;
+
+ for (i = 0; i < count; i++) {
+ dri2_drwble->buffers[i].dri2.attachment = buffers[i].attachment;
+ dri2_drwble->buffers[i].dri2.name = buffers[i].name;
+ dri2_drwble->buffers[i].dri2.pitch = buffers[i].pitch;
+ dri2_drwble->buffers[i].dri2.cpp = buffers[i].cpp;
+ dri2_drwble->buffers[i].dri2.flags = buffers[i].flags;
+
+ if (buffers[i].attachment == __DRI_BUFFER_BACK_LEFT) {
+ dri2_drwble->has_backbuffer = 1;
+ dri2_drwble->back_index = i;
+ }
+
+ if (buffers[i].attachment == __DRI_BUFFER_FRONT_LEFT)
+ dri2_drwble->front_index = i;
+ }
+
+ dri_drwble->width = dri2_drwble->width;
+ dri_drwble->height = dri2_drwble->height;
+ Xfree(buffers);
+
+ if (dri2_drwble->has_backbuffer)
+ return &dri2_drwble->buffers[dri2_drwble->back_index];
+
+ return &dri2_drwble->buffers[dri2_drwble->front_index];
+}
+
+LOCAL void
+dri_state_close(dri_state_t *state) {
+ dri_state_free_drawable_hash(state);
+ assert(state->fd >= 0);
+ close(state->fd);
+}
+
+LOCAL void
+dri_state_release(dri_state_t *state) {
+ dri_state_delete(state);
+}
+
+LOCAL dri_state_t*
+getDRI2State(Display* dpy, int screen, char **driver_name)
+{
+ int major, minor;
+ int error_base;
+ int event_base;
+ char *device_name = NULL;
+ drm_magic_t magic;
+ char * internal_driver_name = NULL;
+ int fd = -1;
+ dri_state_t* state = NULL;
+
+ if (!VA_DRI2QueryExtension(dpy, &event_base, &error_base))
+ goto err_out;
+
+ if (!VA_DRI2QueryVersion(dpy, &major, &minor))
+ goto err_out;
+
+
+ if (!VA_DRI2Connect(dpy, RootWindow(dpy, screen),
+ &internal_driver_name, &device_name))
+ goto err_out;
+
+ fd = open(device_name, O_RDWR);
+ assert(fd >= 0);
+
+ if (fd < 0)
+ goto err_out;
+
+ if (drmGetMagic(fd, &magic))
+ goto err_out;
+
+ if (!VA_DRI2Authenticate(dpy, RootWindow(dpy, screen),
+ magic))
+ goto err_out;
+
+ if(driver_name)
+ *driver_name = internal_driver_name;
+ else
+ Xfree(internal_driver_name);
+
+ state = dri_state_new();
+ state->fd = fd;
+ state->x11_dpy = dpy;
+ state->x11_screen = screen;
+ state->driConnectedFlag = DRI2;
+ if (device_name)
+ Xfree(device_name);
+ return state;
+
+err_out:
+ if (device_name)
+ Xfree(device_name);
+
+ if (internal_driver_name)
+ Xfree(internal_driver_name);
+
+ if(driver_name) *driver_name = NULL;
+
+ if (fd >= 0)
+ close(fd);
+
+ *driver_name = NULL;
+
+ return state;
+}
+
diff --git a/src/x11/dricommon.h b/src/x11/dricommon.h
new file mode 100644
index 0000000..5a950b4
--- /dev/null
+++ b/src/x11/dricommon.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ * Note: the code is taken from libva code base
+ */
+
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _VA_DRICOMMON_H_
+#define _VA_DRICOMMON_H_
+
+#include <X11/Xlib.h>
+#include <xf86drm.h>
+#include <drm.h>
+#include <drm_sarea.h>
+
+union dri_buffer
+{
+ struct {
+ unsigned int attachment;
+ unsigned int name;
+ unsigned int pitch;
+ unsigned int cpp;
+ unsigned int flags;
+ } dri2;
+};
+
+typedef struct dri_drawable
+{
+ XID x_drawable;
+ int x;
+ int y;
+ unsigned int width;
+ unsigned int height;
+ struct dri_drawable *next;
+} dri_drawable_t;
+
+#define DRAWABLE_HASH_SZ 32
+
+enum DRI_VER
+{
+ NONE = 0,
+ // NOT supported VA_DRI1 = 1,
+ DRI2 = 2
+};
+
+typedef struct dri_state
+{
+ Display *x11_dpy;
+ int x11_screen;
+ int fd;
+ enum DRI_VER driConnectedFlag; /* 0: disconnected, 2: DRI2 */
+ dri_drawable_t *drawable_hash[DRAWABLE_HASH_SZ];
+} dri_state_t;
+
+dri_drawable_t *dri_state_create_drawable(dri_state_t*, XID x_drawable);
+void dri_state_destroy_drawable(dri_state_t*, dri_drawable_t*);
+void dri_state_close(dri_state_t*);
+void dri_state_release(dri_state_t*);
+
+// Create a dri2 state from dpy and screen
+dri_state_t *getDRI2State(Display* dpy, int screen, char **driver_name);
+
+#endif /* _VA_DRICOMMON_H_ */
+
diff --git a/src/x11/va_dri2.c b/src/x11/va_dri2.c
new file mode 100644
index 0000000..5225acd
--- /dev/null
+++ b/src/x11/va_dri2.c
@@ -0,0 +1,327 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/*
+ * Copyright � 2008 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Soft-
+ * ware"), to deal in the Software without restriction, including without
+ * limitation the rights to use, copy, modify, merge, publish, distribute,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, provided that the above copyright
+ * notice(s) and this permission notice appear in all copies of the Soft-
+ * ware and that both the above copyright notice(s) and this permission
+ * notice appear in supporting documentation.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
+ * ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY
+ * RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN
+ * THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSE-
+ * QUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFOR-
+ * MANCE OF THIS SOFTWARE.
+ *
+ * Except as contained in this notice, the name of a copyright holder shall
+ * not be used in advertising or otherwise to promote the sale, use or
+ * other dealings in this Software without prior written authorization of
+ * the copyright holder.
+ *
+ * Authors:
+ * Kristian H�gsberg (krh at redhat.com)
+ */
+
+#define NEED_REPLIES
+#include <X11/Xlibint.h>
+#include <X11/extensions/Xext.h>
+#include <X11/extensions/extutil.h>
+#include "xf86drm.h"
+#include "x11/va_dri2.h"
+#include "x11/va_dri2str.h"
+#include "x11/va_dri2tokens.h"
+
+#ifndef DRI2DriverDRI
+#define DRI2DriverDRI 0
+#endif
+
+#define LOCAL __attribute__ ((visibility ("internal")))
+
+static char va_dri2ExtensionName[] = DRI2_NAME;
+static XExtensionInfo _va_dri2_info_data;
+static XExtensionInfo *va_dri2Info = &_va_dri2_info_data;
+static XEXT_GENERATE_CLOSE_DISPLAY (VA_DRI2CloseDisplay, va_dri2Info)
+static /* const */ XExtensionHooks va_dri2ExtensionHooks = {
+ NULL, /* create_gc */
+ NULL, /* copy_gc */
+ NULL, /* flush_gc */
+ NULL, /* free_gc */
+ NULL, /* create_font */
+ NULL, /* free_font */
+ VA_DRI2CloseDisplay, /* close_display */
+ NULL, /* wire_to_event */
+ NULL, /* event_to_wire */
+ NULL, /* error */
+ NULL, /* error_string */
+};
+
+static XEXT_GENERATE_FIND_DISPLAY (DRI2FindDisplay, va_dri2Info,
+ va_dri2ExtensionName,
+ &va_dri2ExtensionHooks,
+ 0, NULL)
+
+LOCAL Bool VA_DRI2QueryExtension(Display *dpy, int *eventBase, int *errorBase)
+{
+ XExtDisplayInfo *info = DRI2FindDisplay(dpy);
+
+ if (XextHasExtension(info)) {
+ *eventBase = info->codes->first_event;
+ *errorBase = info->codes->first_error;
+ return True;
+ }
+
+ return False;
+}
+
+LOCAL Bool VA_DRI2QueryVersion(Display *dpy, int *major, int *minor)
+{
+ XExtDisplayInfo *info = DRI2FindDisplay (dpy);
+ xDRI2QueryVersionReply rep;
+ xDRI2QueryVersionReq *req;
+
+ XextCheckExtension (dpy, info, va_dri2ExtensionName, False);
+
+ LockDisplay(dpy);
+ GetReq(DRI2QueryVersion, req);
+ req->reqType = info->codes->major_opcode;
+ req->dri2Reqtype = X_DRI2QueryVersion;
+ req->majorVersion = DRI2_MAJOR;
+ req->minorVersion = DRI2_MINOR;
+ if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) {
+ UnlockDisplay(dpy);
+ SyncHandle();
+ return False;
+ }
+ *major = rep.majorVersion;
+ *minor = rep.minorVersion;
+ UnlockDisplay(dpy);
+ SyncHandle();
+
+ return True;
+}
+
+LOCAL Bool VA_DRI2Connect(Display *dpy, XID window,
+ char **driverName, char **deviceName)
+{
+ XExtDisplayInfo *info = DRI2FindDisplay(dpy);
+ xDRI2ConnectReply rep;
+ xDRI2ConnectReq *req;
+
+ XextCheckExtension (dpy, info, va_dri2ExtensionName, False);
+
+ LockDisplay(dpy);
+ GetReq(DRI2Connect, req);
+ req->reqType = info->codes->major_opcode;
+ req->dri2Reqtype = X_DRI2Connect;
+ req->window = window;
+ req->drivertype = DRI2DriverDRI;
+ if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) {
+ UnlockDisplay(dpy);
+ SyncHandle();
+ return False;
+ }
+
+ if (rep.driverNameLength == 0 && rep.deviceNameLength == 0) {
+ UnlockDisplay(dpy);
+ SyncHandle();
+ return False;
+ }
+
+ *driverName = Xmalloc(rep.driverNameLength + 1);
+ if (*driverName == NULL) {
+ _XEatData(dpy,
+ ((rep.driverNameLength + 3) & ~3) +
+ ((rep.deviceNameLength + 3) & ~3));
+ UnlockDisplay(dpy);
+ SyncHandle();
+ return False;
+ }
+ _XReadPad(dpy, *driverName, rep.driverNameLength);
+ (*driverName)[rep.driverNameLength] = '\0';
+
+ *deviceName = Xmalloc(rep.deviceNameLength + 1);
+ if (*deviceName == NULL) {
+ Xfree(*driverName);
+ _XEatData(dpy, ((rep.deviceNameLength + 3) & ~3));
+ UnlockDisplay(dpy);
+ SyncHandle();
+ return False;
+ }
+ _XReadPad(dpy, *deviceName, rep.deviceNameLength);
+ (*deviceName)[rep.deviceNameLength] = '\0';
+
+ UnlockDisplay(dpy);
+ SyncHandle();
+
+ return True;
+}
+
+LOCAL Bool VA_DRI2Authenticate(Display *dpy, XID window, drm_magic_t magic)
+{
+ XExtDisplayInfo *info = DRI2FindDisplay(dpy);
+ xDRI2AuthenticateReq *req;
+ xDRI2AuthenticateReply rep;
+
+ XextCheckExtension (dpy, info, va_dri2ExtensionName, False);
+
+ LockDisplay(dpy);
+ GetReq(DRI2Authenticate, req);
+ req->reqType = info->codes->major_opcode;
+ req->dri2Reqtype = X_DRI2Authenticate;
+ req->window = window;
+ req->magic = magic;
+
+ if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) {
+ UnlockDisplay(dpy);
+ SyncHandle();
+ return False;
+ }
+
+ UnlockDisplay(dpy);
+ SyncHandle();
+
+ return rep.authenticated;
+}
+
+LOCAL void VA_DRI2CreateDrawable(Display *dpy, XID drawable)
+{
+ XExtDisplayInfo *info = DRI2FindDisplay(dpy);
+ xDRI2CreateDrawableReq *req;
+
+ XextSimpleCheckExtension (dpy, info, va_dri2ExtensionName);
+
+ LockDisplay(dpy);
+ GetReq(DRI2CreateDrawable, req);
+ req->reqType = info->codes->major_opcode;
+ req->dri2Reqtype = X_DRI2CreateDrawable;
+ req->drawable = drawable;
+ UnlockDisplay(dpy);
+ SyncHandle();
+}
+
+LOCAL void VA_DRI2DestroyDrawable(Display *dpy, XID drawable)
+{
+ XExtDisplayInfo *info = DRI2FindDisplay(dpy);
+ xDRI2DestroyDrawableReq *req;
+
+ XextSimpleCheckExtension (dpy, info, va_dri2ExtensionName);
+
+ XSync(dpy, False);
+
+ LockDisplay(dpy);
+ GetReq(DRI2DestroyDrawable, req);
+ req->reqType = info->codes->major_opcode;
+ req->dri2Reqtype = X_DRI2DestroyDrawable;
+ req->drawable = drawable;
+ UnlockDisplay(dpy);
+ SyncHandle();
+}
+
+LOCAL VA_DRI2Buffer *VA_DRI2GetBuffers(Display *dpy, XID drawable,
+ int *width, int *height,
+ unsigned int *attachments, int count,
+ int *outcount)
+{
+ XExtDisplayInfo *info = DRI2FindDisplay(dpy);
+ xDRI2GetBuffersReply rep;
+ xDRI2GetBuffersReq *req;
+ VA_DRI2Buffer *buffers;
+ xDRI2Buffer repBuffer;
+ CARD32 *p;
+ int i;
+
+ XextCheckExtension (dpy, info, va_dri2ExtensionName, False);
+
+ LockDisplay(dpy);
+ GetReqExtra(DRI2GetBuffers, count * 4, req);
+ req->reqType = info->codes->major_opcode;
+ req->dri2Reqtype = X_DRI2GetBuffers;
+ req->drawable = drawable;
+ req->count = count;
+ p = (CARD32 *) &req[1];
+ for (i = 0; i < count; i++)
+ p[i] = attachments[i];
+
+ if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) {
+ UnlockDisplay(dpy);
+ SyncHandle();
+ return NULL;
+ }
+
+ *width = rep.width;
+ *height = rep.height;
+ *outcount = rep.count;
+
+ buffers = Xmalloc(rep.count * sizeof buffers[0]);
+ if (buffers == NULL) {
+ _XEatData(dpy, rep.count * sizeof repBuffer);
+ UnlockDisplay(dpy);
+ SyncHandle();
+ return NULL;
+ }
+
+ for (i = 0; i < (int) rep.count; i++) {
+ _XReadPad(dpy, (char *) &repBuffer, sizeof repBuffer);
+ buffers[i].attachment = repBuffer.attachment;
+ buffers[i].name = repBuffer.name;
+ buffers[i].pitch = repBuffer.pitch;
+ buffers[i].cpp = repBuffer.cpp;
+ buffers[i].flags = repBuffer.flags;
+ }
+
+ UnlockDisplay(dpy);
+ SyncHandle();
+
+ return buffers;
+}
+
+LOCAL void VA_DRI2CopyRegion(Display *dpy, XID drawable, XserverRegion region,
+ CARD32 dest, CARD32 src)
+{
+ XExtDisplayInfo *info = DRI2FindDisplay(dpy);
+ xDRI2CopyRegionReq *req;
+ xDRI2CopyRegionReply rep;
+
+ XextSimpleCheckExtension (dpy, info, va_dri2ExtensionName);
+
+ LockDisplay(dpy);
+ GetReq(DRI2CopyRegion, req);
+ req->reqType = info->codes->major_opcode;
+ req->dri2Reqtype = X_DRI2CopyRegion;
+ req->drawable = drawable;
+ req->region = region;
+ req->dest = dest;
+ req->src = src;
+
+ _XReply(dpy, (xReply *)&rep, 0, xFalse);
+
+ UnlockDisplay(dpy);
+ SyncHandle();
+}
diff --git a/src/x11/va_dri2.h b/src/x11/va_dri2.h
new file mode 100644
index 0000000..1a1f96e
--- /dev/null
+++ b/src/x11/va_dri2.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/*
+ * Copyright � 2007,2008 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Soft-
+ * ware"), to deal in the Software without restriction, including without
+ * limitation the rights to use, copy, modify, merge, publish, distribute,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, provided that the above copyright
+ * notice(s) and this permission notice appear in all copies of the Soft-
+ * ware and that both the above copyright notice(s) and this permission
+ * notice appear in supporting documentation.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
+ * ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY
+ * RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN
+ * THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSE-
+ * QUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFOR-
+ * MANCE OF THIS SOFTWARE.
+ *
+ * Except as contained in this notice, the name of a copyright holder shall
+ * not be used in advertising or otherwise to promote the sale, use or
+ * other dealings in this Software without prior written authorization of
+ * the copyright holder.
+ *
+ * Authors:
+ * Kristian H�gsberg (krh at redhat.com)
+ */
+#ifndef _VA_DRI2_H_
+#define _VA_DRI2_H_
+
+#include <X11/extensions/Xfixes.h>
+#include <X11/Xfuncproto.h>
+#include <xf86drm.h>
+
+typedef struct {
+ unsigned int attachment;
+ unsigned int name;
+ unsigned int pitch;
+ unsigned int cpp;
+ unsigned int flags;
+} VA_DRI2Buffer;
+
+extern Bool
+VA_DRI2QueryExtension(Display *display, int *eventBase, int *errorBase);
+extern Bool
+VA_DRI2QueryVersion(Display *display, int *major, int *minor);
+extern Bool
+VA_DRI2Connect(Display *display, XID window,
+ char **driverName, char **deviceName);
+extern Bool
+VA_DRI2Authenticate(Display *display, XID window, drm_magic_t magic);
+extern void
+VA_DRI2CreateDrawable(Display *display, XID drawable);
+extern void
+VA_DRI2DestroyDrawable(Display *display, XID handle);
+extern VA_DRI2Buffer *
+VA_DRI2GetBuffers(Display *dpy, XID drawable,
+ int *width, int *height,
+ unsigned int *attachments, int count,
+ int *outcount);
+#if 1
+extern void
+VA_DRI2CopyRegion(Display *dpy, XID drawable, XserverRegion region,
+ CARD32 dest, CARD32 src);
+#endif
+#endif
diff --git a/src/x11/va_dri2str.h b/src/x11/va_dri2str.h
new file mode 100644
index 0000000..db10e16
--- /dev/null
+++ b/src/x11/va_dri2str.h
@@ -0,0 +1,211 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/*
+ * Copyright � 2008 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Soft-
+ * ware"), to deal in the Software without restriction, including without
+ * limitation the rights to use, copy, modify, merge, publish, distribute,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, provided that the above copyright
+ * notice(s) and this permission notice appear in all copies of the Soft-
+ * ware and that both the above copyright notice(s) and this permission
+ * notice appear in supporting documentation.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
+ * ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY
+ * RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN
+ * THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSE-
+ * QUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFOR-
+ * MANCE OF THIS SOFTWARE.
+ *
+ * Except as contained in this notice, the name of a copyright holder shall
+ * not be used in advertising or otherwise to promote the sale, use or
+ * other dealings in this Software without prior written authorization of
+ * the copyright holder.
+ *
+ * Authors:
+ * Kristian H�gsberg (krh at redhat.com)
+ */
+#ifndef _DRI2_PROTO_H_
+#define _DRI2_PROTO_H_
+
+#define DRI2_NAME "DRI2"
+#define DRI2_MAJOR 1
+#define DRI2_MINOR 0
+
+#define DRI2NumberErrors 0
+#define DRI2NumberEvents 0
+#define DRI2NumberRequests 7
+
+#define X_DRI2QueryVersion 0
+#define X_DRI2Connect 1
+#define X_DRI2Authenticate 2
+#define X_DRI2CreateDrawable 3
+#define X_DRI2DestroyDrawable 4
+#define X_DRI2GetBuffers 5
+#define X_DRI2CopyRegion 6
+
+typedef struct {
+ CARD32 attachment B32;
+ CARD32 name B32;
+ CARD32 pitch B32;
+ CARD32 cpp B32;
+ CARD32 flags B32;
+} xDRI2Buffer;
+
+typedef struct {
+ CARD8 reqType;
+ CARD8 dri2Reqtype;
+ CARD16 length B16;
+ CARD32 majorVersion B32;
+ CARD32 minorVersion B32;
+} xDRI2QueryVersionReq;
+#define sz_xDRI2QueryVersionReq 12
+
+typedef struct {
+ BYTE type; /* X_Reply */
+ BYTE pad1;
+ CARD16 sequenceNumber B16;
+ CARD32 length B32;
+ CARD32 majorVersion B32;
+ CARD32 minorVersion B32;
+ CARD32 pad2 B32;
+ CARD32 pad3 B32;
+ CARD32 pad4 B32;
+ CARD32 pad5 B32;
+} xDRI2QueryVersionReply;
+#define sz_xDRI2QueryVersionReply 32
+
+typedef struct {
+ CARD8 reqType;
+ CARD8 dri2Reqtype;
+ CARD16 length B16;
+ CARD32 window B32;
+ CARD32 drivertype B32;
+} xDRI2ConnectReq;
+#define sz_xDRI2ConnectReq 12
+
+typedef struct {
+ BYTE type; /* X_Reply */
+ BYTE pad1;
+ CARD16 sequenceNumber B16;
+ CARD32 length B32;
+ CARD32 driverNameLength B32;
+ CARD32 deviceNameLength B32;
+ CARD32 pad2 B32;
+ CARD32 pad3 B32;
+ CARD32 pad4 B32;
+ CARD32 pad5 B32;
+} xDRI2ConnectReply;
+#define sz_xDRI2ConnectReply 32
+
+typedef struct {
+ CARD8 reqType;
+ CARD8 dri2Reqtype;
+ CARD16 length B16;
+ CARD32 window B32;
+ CARD32 magic B32;
+} xDRI2AuthenticateReq;
+#define sz_xDRI2AuthenticateReq 12
+
+typedef struct {
+ BYTE type; /* X_Reply */
+ BYTE pad1;
+ CARD16 sequenceNumber B16;
+ CARD32 length B32;
+ CARD32 authenticated B32;
+ CARD32 pad2 B32;
+ CARD32 pad3 B32;
+ CARD32 pad4 B32;
+ CARD32 pad5 B32;
+ CARD32 pad6 B32;
+} xDRI2AuthenticateReply;
+#define sz_xDRI2AuthenticateReply 32
+
+typedef struct {
+ CARD8 reqType;
+ CARD8 dri2Reqtype;
+ CARD16 length B16;
+ CARD32 drawable B32;
+} xDRI2CreateDrawableReq;
+#define sz_xDRI2CreateDrawableReq 8
+
+typedef struct {
+ CARD8 reqType;
+ CARD8 dri2Reqtype;
+ CARD16 length B16;
+ CARD32 drawable B32;
+} xDRI2DestroyDrawableReq;
+#define sz_xDRI2DestroyDrawableReq 8
+
+typedef struct {
+ CARD8 reqType;
+ CARD8 dri2Reqtype;
+ CARD16 length B16;
+ CARD32 drawable B32;
+ CARD32 count B32;
+} xDRI2GetBuffersReq;
+#define sz_xDRI2GetBuffersReq 12
+
+typedef struct {
+ BYTE type; /* X_Reply */
+ BYTE pad1;
+ CARD16 sequenceNumber B16;
+ CARD32 length B32;
+ CARD32 width B32;
+ CARD32 height B32;
+ CARD32 count B32;
+ CARD32 pad2 B32;
+ CARD32 pad3 B32;
+ CARD32 pad4 B32;
+} xDRI2GetBuffersReply;
+#define sz_xDRI2GetBuffersReply 32
+
+typedef struct {
+ CARD8 reqType;
+ CARD8 dri2Reqtype;
+ CARD16 length B16;
+ CARD32 drawable B32;
+ CARD32 region B32;
+ CARD32 dest B32;
+ CARD32 src B32;
+} xDRI2CopyRegionReq;
+#define sz_xDRI2CopyRegionReq 20
+
+typedef struct {
+ BYTE type; /* X_Reply */
+ BYTE pad1;
+ CARD16 sequenceNumber B16;
+ CARD32 length B32;
+ CARD32 pad2 B32;
+ CARD32 pad3 B32;
+ CARD32 pad4 B32;
+ CARD32 pad5 B32;
+ CARD32 pad6 B32;
+ CARD32 pad7 B32;
+} xDRI2CopyRegionReply;
+#define sz_xDRI2CopyRegionReply 32
+
+#endif
diff --git a/src/x11/va_dri2tokens.h b/src/x11/va_dri2tokens.h
new file mode 100644
index 0000000..d3c31f3
--- /dev/null
+++ b/src/x11/va_dri2tokens.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/*
+ * Copyright � 2008 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Soft-
+ * ware"), to deal in the Software without restriction, including without
+ * limitation the rights to use, copy, modify, merge, publish, distribute,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, provided that the above copyright
+ * notice(s) and this permission notice appear in all copies of the Soft-
+ * ware and that both the above copyright notice(s) and this permission
+ * notice appear in supporting documentation.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
+ * ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY
+ * RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN
+ * THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSE-
+ * QUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFOR-
+ * MANCE OF THIS SOFTWARE.
+ *
+ * Except as contained in this notice, the name of a copyright holder shall
+ * not be used in advertising or otherwise to promote the sale, use or
+ * other dealings in this Software without prior written authorization of
+ * the copyright holder.
+ *
+ * Authors:
+ * Kristian H�gsberg (krh at redhat.com)
+ */
+#ifndef _DRI2_TOKENS_H_
+#define _DRI2_TOKENS_H_
+
+#define DRI2BufferFrontLeft 0
+#define DRI2BufferBackLeft 1
+#define DRI2BufferFrontRight 2
+#define DRI2BufferBackRight 3
+#define DRI2BufferDepth 4
+#define DRI2BufferStencil 5
+#define DRI2BufferAccum 6
+#define DRI2BufferFakeFrontLeft 7
+#define DRI2BufferFakeFrontRight 8
+
+#define DRI2DriverDRI 0
+
+#endif
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
new file mode 100644
index 0000000..96409cb
--- /dev/null
+++ b/utests/CMakeLists.txt
@@ -0,0 +1,65 @@
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}
+ ${CMAKE_CURRENT_SOURCE_DIR}/../include)
+
+link_directories (${LLVM_LIBRARY_DIRS})
+ADD_LIBRARY(utests SHARED
+ utest_error.c
+compiler_shader_toy.cpp
+ compiler_mandelbrot.cpp
+ compiler_mandelbrot_alternate.cpp
+ compiler_box_blur_float.cpp
+ compiler_box_blur.cpp
+ compiler_insert_to_constant.cpp
+ compiler_argument_structure.cpp
+ compiler_array0.cpp
+ compiler_array.cpp
+ compiler_array1.cpp
+ compiler_array2.cpp
+ compiler_array3.cpp
+ compiler_byte_scatter.cpp
+ compiler_copy_buffer.cpp
+ compiler_copy_buffer_row.cpp
+ compiler_function_argument0.cpp
+ compiler_function_argument1.cpp
+ compiler_function_argument.cpp
+ compiler_if_else.cpp
+ compiler_lower_return0.cpp
+ compiler_lower_return1.cpp
+ compiler_lower_return2.cpp
+ compiler_short_scatter.cpp
+ compiler_sub_bytes.cpp
+ compiler_sub_shorts.cpp
+ compiler_uint2_copy.cpp
+ compiler_uint3_copy.cpp
+ compiler_uint8_copy.cpp
+ compiler_uint16_copy.cpp
+ compiler_uint3_unaligned_copy.cpp
+ compiler_unstructured_branch0.cpp
+ compiler_unstructured_branch1.cpp
+ compiler_unstructured_branch2.cpp
+ compiler_unstructured_branch3.cpp
+ compiler_write_only_bytes.cpp
+ compiler_write_only.cpp
+ compiler_write_only_shorts.cpp
+ compiler_switch.cpp
+ compiler_math.cpp
+ compiler_insn_selection_min.cpp
+ compiler_insn_selection_max.cpp
+ compiler_insn_selection_masked_min_max.cpp
+ compiler_local_memory.cpp
+ compiler_local_memory_two_ptr.cpp
+ compiler_local_memory_barrier.cpp
+ compiler_local_memory_barrier_wg64.cpp
+ utest_assert.cpp
+ utest.cpp
+ utest_file_map.cpp
+ utest_helper.cpp)
+
+TARGET_LINK_LIBRARIES(utests cl m)
+
+ADD_EXECUTABLE(utest_run utest_run.cpp)
+TARGET_LINK_LIBRARIES(utest_run utests)
+
+ADD_EXECUTABLE(flat_address_space runtime_flat_address_space.cpp)
+TARGET_LINK_LIBRARIES(flat_address_space utests)
+
diff --git a/utests/Makefile b/utests/Makefile
new file mode 100644
index 0000000..c221398
--- /dev/null
+++ b/utests/Makefile
@@ -0,0 +1,78 @@
+TOP=..
+
+DIR_CXXFLAGS=-fexceptions
+
+include ../Makefile.defs
+
+SUBDIRS=.
+
+C_SRC=utest_error.c
+
+CPP_SRC=\
+ compiler_shader_toy.cpp \
+ compiler_mandelbrot.cpp \
+ compiler_mandelbrot_alternate.cpp \
+ compiler_box_blur.cpp \
+ compiler_box_blur_float.cpp \
+ compiler_insert_to_constant.cpp \
+ compiler_argument_structure.cpp \
+ compiler_array0.cpp \
+ compiler_array.cpp \
+ compiler_array1.cpp \
+ compiler_array2.cpp \
+ compiler_array3.cpp \
+ compiler_byte_scatter.cpp \
+ compiler_copy_buffer.cpp \
+ compiler_copy_buffer_row.cpp \
+ compiler_function_argument0.cpp \
+ compiler_function_argument1.cpp \
+ compiler_function_argument.cpp \
+ compiler_if_else.cpp \
+ compiler_lower_return0.cpp \
+ compiler_lower_return1.cpp \
+ compiler_lower_return2.cpp \
+ compiler_short_scatter.cpp \
+ compiler_sub_bytes.cpp \
+ compiler_sub_shorts.cpp \
+ compiler_uint2_copy.cpp \
+ compiler_uint3_copy.cpp \
+ compiler_uint3_unaligned_copy.cpp \
+ compiler_unstructured_branch0.cpp \
+ compiler_unstructured_branch1.cpp \
+ compiler_unstructured_branch2.cpp \
+ compiler_unstructured_branch3.cpp \
+ compiler_write_only_bytes.cpp \
+ compiler_write_only.cpp \
+ compiler_write_only_shorts.cpp \
+ compiler_switch.cpp \
+ compiler_math.cpp \
+ compiler_insn_selection_min.cpp \
+ compiler_insn_selection_max.cpp \
+ compiler_insn_selection_masked_min_max.cpp \
+ compiler_local_memory.cpp \
+ compiler_local_memory_two_ptr.cpp \
+ compiler_local_memory_barrier.cpp \
+ compiler_local_memory_barrier_wg64.cpp \
+ utest_assert.cpp \
+ utest.cpp \
+ utest_file_map.cpp \
+ utest_helper.cpp
+
+# unsupported for now
+# compiler_local_slm.cpp
+#compiler_argument_structure_indirect.cpp
+
+OBJ=$(C_SRC:.c=.o) $(CPP_SRC:.cpp=.o)
+
+all: utest_run runtime_flat_address_space
+
+utest_run: $(OBJ) utest_run.o $(TOP)/$(LIBBASE)
+ $(CXX) -o $@ $(OBJ) utest_run.o $(TOP)/$(LIBBASE)
+
+runtime_flat_address_space: $(OBJ) runtime_flat_address_space.o
+ $(CXX) -o $@ $(OBJ) runtime_flat_address_space.o $(TOP)/$(LIBBASE)
+
+clean:
+ rm -f $(OBJ)
+ rm -f utest_run utest_run.o
+ rm -f runtime_flat_address_space runtime_flat_address_space.o
diff --git a/utests/compiler_argument_structure.cpp b/utests/compiler_argument_structure.cpp
new file mode 100644
index 0000000..22464a5
--- /dev/null
+++ b/utests/compiler_argument_structure.cpp
@@ -0,0 +1,28 @@
+#include "utest_helper.hpp"
+
+struct hop { int x, y; };
+
+void compiler_argument_structure(void)
+{
+ const size_t n = 2048;
+ hop h = {3, 4};
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_argument_structure");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(hop), &h);
+
+ // Run the kernel
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(0);
+
+ // Check results
+ for (uint32_t i = 0; i < n; ++i)
+ OCL_ASSERT(((uint32_t*)buf_data[0])[i] == 7);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_argument_structure);
+
diff --git a/utests/compiler_argument_structure_indirect.cpp b/utests/compiler_argument_structure_indirect.cpp
new file mode 100644
index 0000000..a4584d5
--- /dev/null
+++ b/utests/compiler_argument_structure_indirect.cpp
@@ -0,0 +1,29 @@
+#include "utest_helper.hpp"
+
+struct hop { int x[16]; };
+
+void compiler_argument_structure_indirect(void)
+{
+ const size_t n = 2048;
+ hop h;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_argument_structure_indirect");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ for (int i = 0; i < 16; ++i) h.x[i] = i;
+ OCL_SET_ARG(1, sizeof(hop), &h);
+
+ // Run the kernel
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(0);
+
+ // Check results
+ for (uint32_t i = 0; i < n; ++i)
+ OCL_ASSERT(((uint32_t*)buf_data[0])[i] == 7);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_argument_structure_indirect);
+
diff --git a/utests/compiler_array.cpp b/utests/compiler_array.cpp
new file mode 100644
index 0000000..8806c99
--- /dev/null
+++ b/utests/compiler_array.cpp
@@ -0,0 +1,28 @@
+#include "utest_helper.hpp"
+
+void compiler_array(void)
+{
+ const size_t n = 16;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_array");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+
+ // First control flow
+ OCL_MAP_BUFFER(0);
+ for (uint32_t i = 0; i < n; ++i) ((int32_t*)buf_data[0])[i] = -2;
+ OCL_UNMAP_BUFFER(0);
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < 16; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == 3);
+ OCL_UNMAP_BUFFER(1);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_array);
+
diff --git a/utests/compiler_array0.cpp b/utests/compiler_array0.cpp
new file mode 100644
index 0000000..7cf2bbb
--- /dev/null
+++ b/utests/compiler_array0.cpp
@@ -0,0 +1,54 @@
+#include "utest_helper.hpp"
+
+static void cpu(int global_id, int *src, int *dst) {
+ int i;
+ int final[16];
+ for (i = 0; i < 16; ++i) {
+ int array[16], j;
+ for (j = 0; j < 16; ++j)
+ array[j] = global_id;
+ for (j = 0; j < src[0]; ++j)
+ array[j] = 1+src[j];
+ final[i] = array[i];
+ }
+ dst[global_id] = final[global_id];
+}
+
+void compiler_array0(void)
+{
+ const size_t n = 16;
+ int cpu_dst[16], cpu_src[16];
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_array0");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = 16;
+ locals[0] = 16;
+
+ // Run random tests
+ for (uint32_t pass = 0; pass < 8; ++pass) {
+ OCL_MAP_BUFFER(0);
+ for (int32_t i = 0; i < (int32_t) n; ++i)
+ cpu_src[i] = ((int32_t*)buf_data[0])[i] = rand() % 16;
+ OCL_UNMAP_BUFFER(0);
+
+ // Run the kernel on GPU
+ OCL_NDRANGE(1);
+
+ // Run on CPU
+ for (int32_t i = 0; i <(int32_t) n; ++i) cpu(i, cpu_src, cpu_dst);
+
+ // Compare
+ OCL_MAP_BUFFER(1);
+ for (int32_t i = 0; i < 11; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == cpu_dst[i]);
+ OCL_UNMAP_BUFFER(1);
+ }
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_array0);
+
+
diff --git a/utests/compiler_array1.cpp b/utests/compiler_array1.cpp
new file mode 100644
index 0000000..fe1ecec
--- /dev/null
+++ b/utests/compiler_array1.cpp
@@ -0,0 +1,52 @@
+#include "utest_helper.hpp"
+
+static void cpu(int global_id, int *src, int *dst) {
+ int final[16];
+ for (int i = 0; i < 16; ++i) {
+ int array[16];
+ for (int j = 0; j < src[0]; ++j)
+ array[j] = 1+src[0];
+ for (int j = src[0]; j < 16; ++j)
+ array[j] = global_id;
+ final[i] = array[i];
+ }
+ dst[global_id] = final[global_id];
+}
+
+void compiler_array1(void)
+{
+ const size_t n = 16;
+ int cpu_dst[16], cpu_src[16];
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_array1");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = 16;
+ locals[0] = 16;
+
+ // Run random tests
+ for (uint32_t pass = 0; pass < 8; ++pass) {
+ OCL_MAP_BUFFER(0);
+ for (int32_t i = 0; i < (int32_t) n; ++i)
+ cpu_src[i] = ((int32_t*)buf_data[0])[i] = rand() % 16;
+ OCL_UNMAP_BUFFER(0);
+
+ // Run the kernel on GPU
+ OCL_NDRANGE(1);
+
+ // Run on CPU
+ for (int32_t i = 0; i <(int32_t) n; ++i) cpu(i, cpu_src, cpu_dst);
+
+ // Compare
+ OCL_MAP_BUFFER(1);
+ for (int32_t i = 0; i < 11; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == cpu_dst[i]);
+ OCL_UNMAP_BUFFER(1);
+ }
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_array1);
+
diff --git a/utests/compiler_array2.cpp b/utests/compiler_array2.cpp
new file mode 100644
index 0000000..61ca9da
--- /dev/null
+++ b/utests/compiler_array2.cpp
@@ -0,0 +1,50 @@
+#include "utest_helper.hpp"
+
+static void cpu(int global_id, int *src, int *dst) {
+ int final[16];
+ int array[16];
+ for (int j = 0; j < 16; ++j) array[j] = j;
+ for (int j = 0; j < 16; ++j) final[j] = j+1;
+ if (global_id == 15)
+ dst[global_id] = final[global_id];
+ else
+ dst[global_id] = array[15 - global_id];
+}
+
+void compiler_array2(void)
+{
+ const size_t n = 16;
+ int cpu_dst[16], cpu_src[16];
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_array2");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = 16;
+ locals[0] = 16;
+
+ // Run random tests
+ for (uint32_t pass = 0; pass < 8; ++pass) {
+ OCL_MAP_BUFFER(0);
+ for (int32_t i = 0; i < (int32_t) n; ++i)
+ cpu_src[i] = ((int32_t*)buf_data[0])[i] = rand() % 16;
+ OCL_UNMAP_BUFFER(0);
+
+ // Run the kernel on GPU
+ OCL_NDRANGE(1);
+
+ // Run on CPU
+ for (int32_t i = 0; i <(int32_t) n; ++i) cpu(i, cpu_src, cpu_dst);
+
+ // Compare
+ OCL_MAP_BUFFER(1);
+ for (int32_t i = 0; i < 11; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == cpu_dst[i]);
+ OCL_UNMAP_BUFFER(1);
+ }
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_array2);
+
diff --git a/utests/compiler_array3.cpp b/utests/compiler_array3.cpp
new file mode 100644
index 0000000..865b1e5
--- /dev/null
+++ b/utests/compiler_array3.cpp
@@ -0,0 +1,51 @@
+#include "utest_helper.hpp"
+
+static void cpu(int global_id, int *src, int *dst) {
+ int tmp[32];
+ for (int i = 0; i < 16; ++i) {
+ for (int j = 0; j < 16; ++j)
+ tmp[j] = global_id;
+ for (int j = 0; j < src[0]; ++j)
+ tmp[j] = 1+src[j];
+ tmp[16+i] = tmp[i];
+ }
+ dst[global_id] = tmp[16+global_id];
+}
+
+void compiler_array3(void)
+{
+ const size_t n = 16;
+ int cpu_dst[16], cpu_src[16];
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_array3");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = 16;
+ locals[0] = 16;
+
+ // Run random tests
+ for (uint32_t pass = 0; pass < 8; ++pass) {
+ OCL_MAP_BUFFER(0);
+ for (int32_t i = 0; i < (int32_t) n; ++i)
+ cpu_src[i] = ((int32_t*)buf_data[0])[i] = rand() % 16;
+ OCL_UNMAP_BUFFER(0);
+
+ // Run the kernel on GPU
+ OCL_NDRANGE(1);
+
+ // Run on CPU
+ for (int32_t i = 0; i <(int32_t) n; ++i) cpu(i, cpu_src, cpu_dst);
+
+ // Compare
+ OCL_MAP_BUFFER(1);
+ for (int32_t i = 0; i < 11; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == cpu_dst[i]);
+ OCL_UNMAP_BUFFER(1);
+ }
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_array3);
+
diff --git a/utests/compiler_box_blur.cpp b/utests/compiler_box_blur.cpp
new file mode 100644
index 0000000..e4e053e
--- /dev/null
+++ b/utests/compiler_box_blur.cpp
@@ -0,0 +1,43 @@
+#include "utest_helper.hpp"
+#include <cmath>
+
+static int w = 0;
+static int h = 0;
+static int sz = 0;
+static const size_t chunk = 64;
+static int *src = NULL, *dst = NULL;
+
+static void compiler_box_blur()
+{
+ OCL_CREATE_KERNEL("compiler_box_blur");
+
+ /* Load the picture */
+ src = cl_read_bmp("lenna128x128.bmp", &w, &h);
+ sz = w * h * sizeof(int);
+
+ /* Run the kernel */
+ OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, sz, src);
+ OCL_CREATE_BUFFER(buf[1], 0, sz, NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ OCL_SET_ARG(2, sizeof(int), &w);
+ OCL_SET_ARG(3, sizeof(int), &h);
+ OCL_SET_ARG(4, sizeof(int), &chunk);
+ globals[0] = size_t(w/4);
+ globals[1] = h/chunk + ((h%chunk)?1:0);
+ locals[0] = 16;
+ locals[1] = 1;
+ free(src);
+ OCL_NDRANGE(2);
+ OCL_MAP_BUFFER(1);
+ dst = (int*) buf_data[1];
+
+ /* Save the image (for debug purpose) */
+ cl_write_bmp(dst, w, h, "compiler_box_blur.bmp");
+
+ /* Compare with the golden image */
+ OCL_CHECK_IMAGE(dst, w, h, "compiler_box_blur_ref.bmp");
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_box_blur);
+
diff --git a/utests/compiler_box_blur_float.cpp b/utests/compiler_box_blur_float.cpp
new file mode 100644
index 0000000..a3c97bc
--- /dev/null
+++ b/utests/compiler_box_blur_float.cpp
@@ -0,0 +1,65 @@
+#include "utest_helper.hpp"
+#include <cmath>
+
+static int *tmp = NULL;
+static struct float4 {float x,y,z,w;} *src = NULL, *dst = NULL;
+static int w = 0;
+static int h = 0;
+static int sz = 0;
+static const size_t chunk = 64;
+
+static void compiler_box_blur_float()
+{
+ OCL_CREATE_KERNEL("compiler_box_blur_float");
+
+ /* Load the picture */
+ tmp = cl_read_bmp("lenna128x128.bmp", &w, &h);
+ sz = w * h * sizeof(float[4]);
+ src = (float4*)malloc(sz);
+
+ /* RGBA -> float4 conversion */
+ const int n = w*h;
+ for (int i = 0; i < n; ++i) {
+ src[i].x = (float) (tmp[i] & 0xff);
+ src[i].y = (float) ((tmp[i] >> 8) & 0xff);
+ src[i].z = (float) ((tmp[i] >> 16) & 0xff);
+ src[i].w = 0.f;
+ }
+ free(tmp);
+
+ /* Run the kernel */
+ OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, sz, src);
+ OCL_CREATE_BUFFER(buf[1], 0, sz, NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ OCL_SET_ARG(2, sizeof(int), &w);
+ OCL_SET_ARG(3, sizeof(int), &h);
+ OCL_SET_ARG(4, sizeof(int), &chunk);
+ globals[0] = size_t(w);
+ globals[1] = h/chunk + ((h%chunk)?1:0);
+ locals[0] = 16;
+ locals[1] = 1;
+ free(src);
+ OCL_NDRANGE(2);
+ OCL_MAP_BUFFER(1);
+ dst = (float4*) buf_data[1];
+
+ /* Convert back to RGBA and save */
+ int *tmp = (int*) malloc(n*sizeof(int));
+ for (int i = 0; i < n; ++i) {
+ int to = int(std::min(dst[i].x, 255.f));
+ to |= int(std::min(dst[i].y, 255.f)) << 8;
+ to |= int(std::min(dst[i].z, 255.f)) << 16;
+ tmp[i] = to;
+ }
+
+ /* Save the image (for debug purpose) */
+ cl_write_bmp(tmp, w, h, "compiler_box_blur_float.bmp");
+
+ /* Compare with the golden image */
+ OCL_CHECK_IMAGE(tmp, w, h, "compiler_box_blur_float_ref.bmp");
+ free(tmp);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_box_blur_float);
+
diff --git a/utests/compiler_byte_scatter.cpp b/utests/compiler_byte_scatter.cpp
new file mode 100644
index 0000000..11300da
--- /dev/null
+++ b/utests/compiler_byte_scatter.cpp
@@ -0,0 +1,24 @@
+#include "utest_helper.hpp"
+
+static void compiler_byte_scatter(void)
+{
+ const size_t n = 128;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_byte_scatter");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int8_t), NULL);
+
+ // Run the kernel
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+
+ // Check result
+ OCL_MAP_BUFFER(0);
+ for (int32_t i = 0; i < (int32_t) n; ++i)
+ OCL_ASSERT(((int8_t*)buf_data[0])[i] == (int8_t) i);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_byte_scatter);
+
diff --git a/utests/compiler_copy_buffer.cpp b/utests/compiler_copy_buffer.cpp
new file mode 100644
index 0000000..8066efe
--- /dev/null
+++ b/utests/compiler_copy_buffer.cpp
@@ -0,0 +1,32 @@
+#include "utest_helper.hpp"
+
+static void compiler_copy_buffer(void)
+{
+ const size_t n = 8192 * 4;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("test_copy_buffer");
+ //OCL_CREATE_KERNEL("compiler_array");
+ buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * n);
+ for (uint32_t i = 0; i < n; ++i) ((uint32_t*)buf_data[0])[i] = i;
+ OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), buf_data[0]);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ free(buf_data[0]);
+ buf_data[0] = NULL;
+
+ // Run the kernel
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+
+ // Check result
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < n; ++i)
+ OCL_ASSERT(((uint32_t*)buf_data[0])[i] == ((uint32_t*)buf_data[1])[i]);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_copy_buffer);
+
diff --git a/utests/compiler_copy_buffer_row.cpp b/utests/compiler_copy_buffer_row.cpp
new file mode 100644
index 0000000..12c0592
--- /dev/null
+++ b/utests/compiler_copy_buffer_row.cpp
@@ -0,0 +1,40 @@
+#include "utest_helper.hpp"
+
+static void compiler_copy_buffer_row(void)
+{
+ uint32_t *src_buffer = NULL;
+ int *data_buffer = NULL;
+ const int row = 8192;
+ const int row_n = 2;
+ const int n = row * row_n;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("test_copy_buffer_row");
+ src_buffer = (uint32_t *) malloc(sizeof(uint32_t) * n);
+ for (int32_t i = 0; i < n; ++i) src_buffer[i] = i;
+ data_buffer = (int *) malloc(sizeof(int) * 2);
+ data_buffer[0] = row;
+ data_buffer[1] = n;
+ OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), src_buffer);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ OCL_CREATE_BUFFER(buf[2], CL_MEM_COPY_HOST_PTR, 2 * sizeof(uint32_t), data_buffer);
+ free(src_buffer);
+ free(data_buffer);
+
+ // Run the kernel
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+
+ // Check results
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (int32_t i = 0; i < n; ++i)
+ OCL_ASSERT(((uint32_t*)buf_data[0])[i] == ((uint32_t*)buf_data[1])[i]);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_copy_buffer_row);
+
diff --git a/utests/compiler_function_argument.cpp b/utests/compiler_function_argument.cpp
new file mode 100644
index 0000000..a39523b
--- /dev/null
+++ b/utests/compiler_function_argument.cpp
@@ -0,0 +1,27 @@
+#include "utest_helper.hpp"
+
+void compiler_function_argument(void)
+{
+ const size_t n = 2048;
+ const int value = 34;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_function_argument");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(int), &value);
+
+ // Run the kernel
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(0);
+
+ // Check results
+ for (uint32_t i = 0; i < n; ++i)
+ OCL_ASSERT(((int*)buf_data[0])[i] == value);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_function_argument);
+
+
diff --git a/utests/compiler_function_argument0.cpp b/utests/compiler_function_argument0.cpp
new file mode 100644
index 0000000..2e4227e
--- /dev/null
+++ b/utests/compiler_function_argument0.cpp
@@ -0,0 +1,26 @@
+#include "utest_helper.hpp"
+
+void compiler_function_argument0(void)
+{
+ const size_t n = 2048;
+ const short value = 34;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_function_argument0");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(short), &value);
+
+ // Run the kernel
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(0);
+
+ // Check results
+ for (uint32_t i = 0; i < n; ++i)
+ OCL_ASSERT(((int*)buf_data[0])[i] == value);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_function_argument0);
+
diff --git a/utests/compiler_function_argument1.cpp b/utests/compiler_function_argument1.cpp
new file mode 100644
index 0000000..48a7677
--- /dev/null
+++ b/utests/compiler_function_argument1.cpp
@@ -0,0 +1,31 @@
+#include "utest_helper.hpp"
+
+void compiler_function_argument1(void)
+{
+ const size_t n = 2048;
+ const char value = 34;
+ const short value0 = 31;
+ const int value1 = 3;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_function_argument1");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(char), &value);
+ OCL_SET_ARG(2, sizeof(short), &value0);
+ OCL_SET_ARG(3, sizeof(int), &value1);
+
+ // Run the kernel
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(0);
+
+ // Check results
+ for (uint32_t i = 0; i < n; ++i)
+ OCL_ASSERT(((int*)buf_data[0])[i] == value + value0 + value1);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_function_argument1);
+
+
diff --git a/utests/compiler_if_else.cpp b/utests/compiler_if_else.cpp
new file mode 100644
index 0000000..e38b23f
--- /dev/null
+++ b/utests/compiler_if_else.cpp
@@ -0,0 +1,64 @@
+#include "utest_helper.hpp"
+
+static void compiler_if_else(void)
+{
+ const size_t n = 17;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_if_else");
+ buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * n);
+ for (uint32_t i = 0; i < n; ++i) ((uint32_t*)buf_data[0])[i] = 2;
+ OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), buf_data[0]);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ free(buf_data[0]);
+ buf_data[0] = NULL;
+
+ // Run the kernel
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = 16;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+
+ // First control flow
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < 16; ++i) {
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == 2);
+ OCL_ASSERT(((int32_t*)buf_data[0])[i] == 1);
+ }
+
+ // Second control flow
+ for (uint32_t i = 0; i < n; ++i) ((int32_t*)buf_data[0])[i] = -1;
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < 16; ++i) {
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == -2);
+ OCL_ASSERT(((int32_t*)buf_data[0])[i] == 2);
+ }
+
+ // Third control flow
+ for (uint32_t i = 0; i < 4; ++i) ((int32_t*)buf_data[0])[i] = 2;
+ for (uint32_t i = 4; i < n; ++i) ((int32_t*)buf_data[0])[i] = -1;
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < 3; ++i) {
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == 2);
+ OCL_ASSERT(((int32_t*)buf_data[0])[i] == 1);
+ }
+ OCL_ASSERT(((int32_t*)buf_data[1])[3] == -1);
+ OCL_ASSERT(((int32_t*)buf_data[0])[3] == 1);
+ for (uint32_t i = 4; i < 16; ++i) {
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == -2);
+ OCL_ASSERT(((int32_t*)buf_data[0])[i] == 2);
+ }
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_if_else);
+
diff --git a/utests/compiler_insert_to_constant.cpp b/utests/compiler_insert_to_constant.cpp
new file mode 100644
index 0000000..c4f737f
--- /dev/null
+++ b/utests/compiler_insert_to_constant.cpp
@@ -0,0 +1,30 @@
+#include "utest_helper.hpp"
+
+void compiler_insert_to_constant(void)
+{
+ const size_t n = 32;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_insert_to_constant");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t[4]), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+
+ // Run the kernel
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(0);
+
+ // Check results
+ uint32_t *data = (uint32_t*) buf_data[0];
+ for (uint32_t i = 0; i < n; ++i) {
+ OCL_ASSERT(data[4*i+0] == 0);
+ OCL_ASSERT(data[4*i+1] == 1);
+ OCL_ASSERT(data[4*i+2] == i);
+ OCL_ASSERT(data[4*i+3] == 3);
+ }
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_insert_to_constant);
+
+
diff --git a/utests/compiler_insn_selection_masked_min_max.cpp b/utests/compiler_insn_selection_masked_min_max.cpp
new file mode 100644
index 0000000..6a2edcc
--- /dev/null
+++ b/utests/compiler_insn_selection_masked_min_max.cpp
@@ -0,0 +1,42 @@
+#include "utest_helper.hpp"
+#include <algorithm>
+
+static void compiler_insn_selection_masked_min_max(void)
+{
+ const size_t n = 256;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_insn_selection_masked_min_max");
+ buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * n);
+ for (uint32_t i = 0; i < n; ++i)
+ ((float*)buf_data[0])[i] = float(i);
+ OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), buf_data[0]);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ free(buf_data[0]);
+ buf_data[0] = NULL;
+
+ // Run the kernel
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+
+ // Check result
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ float *dst = (float*)buf_data[1];
+ float *src = (float*)buf_data[0];
+ for (uint32_t i = 0; i < n; ++i) {
+ float cpu_dst;
+ if (i % 16 > 5)
+ cpu_dst = std::max(src[i], src[7]);
+ else
+ cpu_dst = std::min(src[i], src[10]);
+ OCL_ASSERT(dst[i] == cpu_dst);
+ }
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_insn_selection_masked_min_max)
+
+
diff --git a/utests/compiler_insn_selection_max.cpp b/utests/compiler_insn_selection_max.cpp
new file mode 100644
index 0000000..8552b9f
--- /dev/null
+++ b/utests/compiler_insn_selection_max.cpp
@@ -0,0 +1,37 @@
+#include "utest_helper.hpp"
+#include <algorithm>
+
+static void compiler_insn_selection_max(void)
+{
+ const size_t n = 8192 * 4;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_insn_selection_max");
+ buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * n);
+ for (uint32_t i = 0; i < n; ++i)
+ ((float*)buf_data[0])[i] = float(i);
+ OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), buf_data[0]);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ free(buf_data[0]);
+ buf_data[0] = NULL;
+
+ // Run the kernel
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+
+ // Check result
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ float *dst = (float*)buf_data[1];
+ float *src = (float*)buf_data[0];
+ for (uint32_t i = 0; i < n; ++i) {
+ OCL_ASSERT(dst[i] == std::max(src[i], src[0]));
+ }
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_insn_selection_max)
+
+
diff --git a/utests/compiler_insn_selection_min.cpp b/utests/compiler_insn_selection_min.cpp
new file mode 100644
index 0000000..f5f9d18
--- /dev/null
+++ b/utests/compiler_insn_selection_min.cpp
@@ -0,0 +1,36 @@
+#include "utest_helper.hpp"
+#include <algorithm>
+
+static void compiler_insn_selection_min(void)
+{
+ const size_t n = 8192 * 4;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_insn_selection_min");
+ buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * n);
+ for (uint32_t i = 0; i < n; ++i)
+ ((float*)buf_data[0])[i] = float(i);
+ OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), buf_data[0]);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ free(buf_data[0]);
+ buf_data[0] = NULL;
+
+ // Run the kernel
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+
+ // Check result
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ float *dst = (float*)buf_data[1];
+ float *src = (float*)buf_data[0];
+ for (uint32_t i = 0; i < n; ++i) {
+ OCL_ASSERT(dst[i] == std::min(src[i], src[0]));
+ }
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_insn_selection_min)
+
diff --git a/utests/compiler_local_memory.cpp b/utests/compiler_local_memory.cpp
new file mode 100644
index 0000000..49fa28c
--- /dev/null
+++ b/utests/compiler_local_memory.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "utest_helper.hpp"
+
+static void compiler_local_memory(void)
+{
+ const size_t n = 1024;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_local_memory");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, 64, NULL); // 16 x int
+
+ // Run the kernel
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(0);
+
+ // Check results
+ uint32_t *dst = (uint32_t*)buf_data[0];
+ for (uint32_t i = 0; i < n; i+=16)
+ for (uint32_t j = 0; j < 16; ++j)
+ OCL_ASSERT(dst[i+j] == 15-j);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_local_memory);
+
+
diff --git a/utests/compiler_local_memory_barrier.cpp b/utests/compiler_local_memory_barrier.cpp
new file mode 100644
index 0000000..6c9c98e
--- /dev/null
+++ b/utests/compiler_local_memory_barrier.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "utest_helper.hpp"
+
+static void compiler_local_memory_barrier(void)
+{
+ const size_t n = 1024;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_local_memory_barrier");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, 64, NULL); // 16 x int
+
+ // Run the kernel
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(0);
+
+ // Check results
+ uint32_t *dst = (uint32_t*)buf_data[0];
+ for (uint32_t i = 0; i < n; i+=16)
+ for (uint32_t j = 0; j < 16; ++j)
+ OCL_ASSERT(dst[i+j] == 15-j);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_local_memory_barrier);
+
diff --git a/utests/compiler_local_memory_barrier_wg64.cpp b/utests/compiler_local_memory_barrier_wg64.cpp
new file mode 100644
index 0000000..0cb69f5
--- /dev/null
+++ b/utests/compiler_local_memory_barrier_wg64.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "utest_helper.hpp"
+
+static void compiler_local_memory_barrier_wg64(void)
+{
+ const size_t n = 1024;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_local_memory_barrier_wg64");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, 256, NULL); // 64 x int
+
+ // Run the kernel
+ globals[0] = n;
+ locals[0] = 64;
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(0);
+
+ // Check results
+ uint32_t *dst = (uint32_t*)buf_data[0];
+ for (uint32_t i = 0; i < n; i+=64)
+ for (uint32_t j = 0; j < 64; ++j)
+ OCL_ASSERT(dst[i+j] == 63-j);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_local_memory_barrier_wg64);
+
diff --git a/utests/compiler_local_memory_two_ptr.cpp b/utests/compiler_local_memory_two_ptr.cpp
new file mode 100644
index 0000000..fde5533
--- /dev/null
+++ b/utests/compiler_local_memory_two_ptr.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "utest_helper.hpp"
+
+static void compiler_local_memory_two_ptr(void)
+{
+ const size_t n = 1024;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_local_memory_two_ptr");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, 64, NULL); // 16 x int
+ OCL_SET_ARG(2, 64, NULL); // 16 x int
+
+ // Run the kernel
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(0);
+
+ // Check results
+ int32_t *dst = (int32_t*)buf_data[0];
+ for (int32_t i = 0; i < (int) n; i+=16)
+ for (int32_t j = 0; j < 16; ++j) {
+ const int gid = i + j;
+ const int tid = j;
+ OCL_ASSERT(dst[i+j] == (gid&~0xf) + 15-tid + 15-tid);
+ }
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_local_memory_two_ptr);
+
diff --git a/utests/compiler_local_slm.cpp b/utests/compiler_local_slm.cpp
new file mode 100644
index 0000000..aa9a2fe
--- /dev/null
+++ b/utests/compiler_local_slm.cpp
@@ -0,0 +1,10 @@
+#include "utest_helper.hpp"
+
+void compiler_local_slm(void)
+{
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_local_slm");
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_local_slm);
+
diff --git a/utests/compiler_lower_return0.cpp b/utests/compiler_lower_return0.cpp
new file mode 100644
index 0000000..0e9dbd0
--- /dev/null
+++ b/utests/compiler_lower_return0.cpp
@@ -0,0 +1,54 @@
+#include "utest_helper.hpp"
+
+static void compiler_lower_return0(void)
+{
+ const size_t n = 32;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_lower_return0");
+ buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * n);
+ for (uint32_t i = 0; i < n; ++i) ((uint32_t*)buf_data[0])[i] = 2;
+ OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), buf_data[0]);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ free(buf_data[0]);
+ buf_data[0] = NULL;
+
+ // Run the kernel
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+
+ // First control flow
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (int32_t i = 0; i < 32; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == i);
+
+ // Second control flow
+ for (uint32_t i = 0; i < n; ++i) ((int32_t*)buf_data[0])[i] = -2;
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < 32; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == -2);
+
+ // Third control flow
+ for (uint32_t i = 0; i < 8; ++i) ((int32_t*)buf_data[0])[i] = 2;
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (int32_t i = 0; i < 8; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == i);
+ for (int32_t i = 8; i < 32; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == -2);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_lower_return0);
+
+
diff --git a/utests/compiler_lower_return1.cpp b/utests/compiler_lower_return1.cpp
new file mode 100644
index 0000000..b4f1fe3
--- /dev/null
+++ b/utests/compiler_lower_return1.cpp
@@ -0,0 +1,47 @@
+#include "utest_helper.hpp"
+
+static void compiler_lower_return1(void)
+{
+ const size_t n = 32;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_lower_return1");
+ buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * n);
+ for (uint32_t i = 0; i < n; ++i) ((uint32_t*)buf_data[0])[i] = 2;
+ OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), buf_data[0]);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ free(buf_data[0]);
+ buf_data[0] = NULL;
+
+ // Run the kernel
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = 16;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+
+ // First control flow
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (int32_t i = 0; i < 11; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == i);
+ for (int32_t i = 11; i < 16; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == 2);
+
+ // Second control flow
+ for (uint32_t i = 0; i < 4; ++i) ((int32_t*)buf_data[0])[i] = -2;
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (int32_t i = 0; i < 4; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == -2);
+ for (int32_t i = 4; i < 11; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == i);
+ for (int32_t i = 11; i < 16; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == 2);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_lower_return1);
+
diff --git a/utests/compiler_lower_return2.cpp b/utests/compiler_lower_return2.cpp
new file mode 100644
index 0000000..1e34036
--- /dev/null
+++ b/utests/compiler_lower_return2.cpp
@@ -0,0 +1,48 @@
+#include "utest_helper.hpp"
+
+static void cpu(int global_id, int *src, int *dst) {
+ const int id = global_id;
+ dst[id] = id;
+ while (dst[id] > src[id]) {
+ if (dst[id] > 10) return;
+ dst[id]--;
+ }
+ dst[id] += 2;
+}
+
+static void compiler_lower_return2(void)
+{
+ const size_t n = 16;
+ int cpu_dst[16], cpu_src[16];
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_lower_return2");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = 16;
+ locals[0] = 16;
+
+ for (uint32_t pass = 0; pass < 8; ++pass) {
+ OCL_MAP_BUFFER(0);
+ for (int32_t i = 0; i < (int32_t) n; ++i)
+ cpu_src[i] = ((int32_t*)buf_data[0])[i] = rand() % 16;
+ OCL_UNMAP_BUFFER(0);
+
+ // Run the kernel on GPU
+ OCL_NDRANGE(1);
+
+ // Run on CPU
+ for (int32_t i = 0; i <(int32_t) n; ++i) cpu(i, cpu_src, cpu_dst);
+
+ // Compare
+ OCL_MAP_BUFFER(1);
+ for (int32_t i = 0; i < 11; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == cpu_dst[i]);
+ OCL_UNMAP_BUFFER(1);
+ }
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_lower_return2);
+
diff --git a/utests/compiler_mandelbrot.cpp b/utests/compiler_mandelbrot.cpp
new file mode 100644
index 0000000..7758dae
--- /dev/null
+++ b/utests/compiler_mandelbrot.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "utest_helper.hpp"
+
+static int *dst = NULL;
+static const size_t w = 256;
+static const size_t h = 256;
+
+static void compiler_mandelbrot(void)
+{
+ const size_t global[2] = {w, h};
+ const size_t local[2] = {16, 1};
+ const size_t sz = w * h * sizeof(char[4]);
+
+ OCL_CREATE_KERNEL("compiler_mandelbrot");
+
+ OCL_CREATE_BUFFER(buf[0], 0, sz, NULL);
+ OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &buf[0]);
+ OCL_CALL (clEnqueueNDRangeKernel, queue, kernel, 2, NULL, global, local, 0, NULL, NULL);
+ OCL_MAP_BUFFER(0);
+ dst = (int *) buf_data[0];
+
+ /* Save the image (for debug purpose) */
+ cl_write_bmp(dst, w, h, "compiler_mandelbrot.bmp");
+
+ /* Compare with the golden image */
+ OCL_CHECK_IMAGE(dst, w, h, "compiler_mandelbrot_ref.bmp");
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_mandelbrot);
+
diff --git a/utests/compiler_mandelbrot_alternate.cpp b/utests/compiler_mandelbrot_alternate.cpp
new file mode 100644
index 0000000..2e5d59f
--- /dev/null
+++ b/utests/compiler_mandelbrot_alternate.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "utest_helper.hpp"
+
+static int *dst = NULL;
+static const size_t w = 256;
+static const size_t h = 256;
+static const float criterium = 4.f;
+
+static void compiler_mandelbrot_alternate(void)
+{
+ const size_t global[2] = {w, h};
+ const size_t local[2] = {16, 1};
+ const size_t sz = w * h * sizeof(char[4]);
+ const float rcpWidth = 1.f / float(w);
+ const float rcpHeight = 1.f / float(h);
+
+ OCL_CREATE_KERNEL("compiler_mandelbrot_alternate");
+
+ OCL_CREATE_BUFFER(buf[0], 0, sz, NULL);
+ OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &buf[0]);
+ OCL_CALL (clSetKernelArg, kernel, 1, sizeof(float), &rcpWidth);
+ OCL_CALL (clSetKernelArg, kernel, 2, sizeof(float), &rcpHeight);
+ OCL_CALL (clSetKernelArg, kernel, 3, sizeof(float), &criterium);
+ OCL_CALL (clEnqueueNDRangeKernel, queue, kernel, 2, NULL, global, local, 0, NULL, NULL);
+ OCL_MAP_BUFFER(0);
+ dst = (int *) buf_data[0];
+
+ /* Save the image (for debug purpose) */
+ cl_write_bmp(dst, w, h, "compiler_mandelbrot_alternate.bmp");
+
+ /* Compare with the golden image */
+ OCL_CHECK_IMAGE(dst, w, h, "compiler_mandelbrot_alternate_ref.bmp");
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_mandelbrot_alternate);
+
diff --git a/utests/compiler_math.cpp b/utests/compiler_math.cpp
new file mode 100644
index 0000000..7303dd5
--- /dev/null
+++ b/utests/compiler_math.cpp
@@ -0,0 +1,55 @@
+#include "utest_helper.hpp"
+#include <cmath>
+#include <algorithm>
+
+static void cpu_compiler_math(float *dst, float *src, int get_global_id0)
+{
+ const float x = src[get_global_id0];
+ switch (get_global_id0) {
+ case 0: dst[get_global_id0] = cosf(x); break;
+ case 1: dst[get_global_id0] = sinf(x); break;
+ case 2: dst[get_global_id0] = log2f(x); break;
+ case 3: dst[get_global_id0] = sqrtf(x); break;
+ case 4: dst[get_global_id0] = 1.f/ sqrtf(x); break;
+ case 5: dst[get_global_id0] = 1.f / x; break;
+ case 6: dst[get_global_id0] = tanf(x); break;
+ default: dst[get_global_id0] = 1.f; break;
+ };
+}
+
+static void compiler_math(void)
+{
+ const size_t n = 32;
+ float cpu_dst[32], cpu_src[32];
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_math");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = 16;
+ locals[0] = 16;
+
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < 32; ++i)
+ cpu_src[i] = ((float*)buf_data[1])[i] = float(i);
+ OCL_UNMAP_BUFFER(1);
+ OCL_NDRANGE(1);
+
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (int i = 0; i < 16; ++i)
+ cpu_compiler_math(cpu_dst, cpu_src, i);
+ for (int i = 0; i < 16; ++i) {
+ const float cpu = cpu_dst[i];
+ const float gpu = ((float*)buf_data[0])[i];
+ OCL_ASSERT(fabs(gpu-cpu)/std::max(fabs(cpu), fabs(gpu)) < 1e-4f);
+ }
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_math)
+
+
diff --git a/utests/compiler_shader_toy.cpp b/utests/compiler_shader_toy.cpp
new file mode 100644
index 0000000..6c34003
--- /dev/null
+++ b/utests/compiler_shader_toy.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/* This is a super simple wrapper for the OpenCL kernels I ported from GLSL code
+ * taken in Inigo's web site:
+ * http://www.iquilezles.org/apps/shadertoy/index.html
+ *
+ * They are pretty cool and rather complex kernels. Just the right thing to have
+ * something a bit more complicated and interesting than unit tests.
+ *
+ * The code here is just to wrap the common code used by all the kernels (to run
+ * the code and assert its correctness)
+ */
+#include "utest_helper.hpp"
+
+static const int dim = 256;
+
+static void run_kernel(int w, int h, const char *name)
+{
+ const size_t global[2] = {size_t(w), size_t(h)};
+ const size_t local[2] = {16, 1};
+ const size_t sz = w * h * sizeof(char[4]);
+ const float fx = float(w);
+ const float fy = float(h);
+ char kernel_file[256];
+ char dst_img[256];
+ char ref_img[256];
+
+ snprintf(kernel_file, sizeof(kernel_file), "%s.cl", name);
+ snprintf(dst_img, sizeof(dst_img), "%s.bmp", name);
+ snprintf(ref_img, sizeof(ref_img), "%s_ref.bmp", name);
+ OCL_CALL (cl_kernel_init, kernel_file, name, SOURCE);
+
+ OCL_CREATE_BUFFER(buf[0], 0, sz, NULL);
+ OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &buf[0]);
+ OCL_CALL (clSetKernelArg, kernel, 1, sizeof(float), &fx);
+ OCL_CALL (clSetKernelArg, kernel, 2, sizeof(float), &fy);
+ OCL_CALL (clSetKernelArg, kernel, 3, sizeof(int), &w);
+ OCL_CALL (clEnqueueNDRangeKernel, queue, kernel, 2, NULL, global, local, 0, NULL, NULL);
+ OCL_MAP_BUFFER(0);
+ int *dst = (int*) buf_data[0];
+
+ /* Save the image (for debug purpose) */
+ cl_write_bmp(dst, w, h, dst_img);
+
+ /* Compare with the golden image */
+ OCL_CHECK_IMAGE(dst, w, h, ref_img);
+}
+
+#define DECL_SHADER_TOY_TEST(W,H,NAME) \
+ static void NAME(void) { run_kernel(W,H,#NAME); } \
+ MAKE_UTEST_FROM_FUNCTION(NAME);
+
+DECL_SHADER_TOY_TEST(dim,dim,compiler_clod);
+DECL_SHADER_TOY_TEST(dim,dim,compiler_ribbon);
+DECL_SHADER_TOY_TEST(dim,dim,compiler_nautilus);
+DECL_SHADER_TOY_TEST(dim,dim,compiler_menger_sponge_no_shadow);
+DECL_SHADER_TOY_TEST(dim,dim,compiler_julia);
+DECL_SHADER_TOY_TEST(dim,dim,compiler_julia_no_break);
+
+// Still issues here for LLVM 3.2
+// DECL_SHADER_TOY_TEST(dim,dim,compiler_chocolux);
+// DECL_SHADER_TOY_TEST(dim,dim,compiler_menger_sponge);
+
+#undef DECL_SHADER_TOY_TEST
+
diff --git a/utests/compiler_short_scatter.cpp b/utests/compiler_short_scatter.cpp
new file mode 100644
index 0000000..1746744
--- /dev/null
+++ b/utests/compiler_short_scatter.cpp
@@ -0,0 +1,25 @@
+#include "utest_helper.hpp"
+
+static void compiler_short_scatter(void)
+{
+ const size_t n = 128;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_short_scatter");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int16_t), NULL);
+
+ // Run the kernel
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+
+ // Check result
+ OCL_MAP_BUFFER(0);
+ for (int32_t i = 0; i < (int32_t) n; ++i)
+ OCL_ASSERT(((int16_t*)buf_data[0])[i] == (int16_t) i);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_short_scatter);
+
+
diff --git a/utests/compiler_sub_bytes.cpp b/utests/compiler_sub_bytes.cpp
new file mode 100644
index 0000000..49a5261
--- /dev/null
+++ b/utests/compiler_sub_bytes.cpp
@@ -0,0 +1,35 @@
+#include "utest_helper.hpp"
+
+static void compiler_sub_bytes(void)
+{
+ const size_t n = 16;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_sub_bytes");
+ buf_data[0] = (int8_t*) malloc(sizeof(int8_t) * n);
+ buf_data[1] = (int8_t*) malloc(sizeof(int8_t) * n);
+ for (uint32_t i = 0; i < n; ++i) ((int8_t*)buf_data[0])[i] = (int8_t) rand();
+ for (uint32_t i = 0; i < n; ++i) ((int8_t*)buf_data[1])[i] = (int8_t) rand();
+ OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(int8_t), buf_data[0]);
+ OCL_CREATE_BUFFER(buf[1], CL_MEM_COPY_HOST_PTR, n * sizeof(int8_t), buf_data[0]);
+ OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int8_t), NULL);
+
+ // Run the kernel
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+
+ // Check result
+ OCL_MAP_BUFFER(2);
+ for (uint32_t i = 0; i < n; ++i)
+ OCL_ASSERT(((int8_t*)buf_data[2])[i] = ((int8_t*)buf_data[0])[i] - ((int8_t*)buf_data[1])[i]);
+ free(buf_data[0]);
+ free(buf_data[1]);
+ buf_data[0] = buf_data[1] = NULL;
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_sub_bytes);
+
diff --git a/utests/compiler_sub_shorts.cpp b/utests/compiler_sub_shorts.cpp
new file mode 100644
index 0000000..4aeeca3
--- /dev/null
+++ b/utests/compiler_sub_shorts.cpp
@@ -0,0 +1,36 @@
+#include "utest_helper.hpp"
+
+static void compiler_sub_shorts(void)
+{
+ const size_t n = 16;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_sub_shorts");
+ buf_data[0] = (int16_t*) malloc(sizeof(int16_t) * n);
+ buf_data[1] = (int16_t*) malloc(sizeof(int16_t) * n);
+ for (uint32_t i = 0; i < n; ++i) ((int16_t*)buf_data[0])[i] = (int16_t) rand();
+ for (uint32_t i = 0; i < n; ++i) ((int16_t*)buf_data[1])[i] = (int16_t) rand();
+ OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(int16_t), buf_data[0]);
+ OCL_CREATE_BUFFER(buf[1], CL_MEM_COPY_HOST_PTR, n * sizeof(int16_t), buf_data[0]);
+ OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int16_t), NULL);
+
+ // Run the kernel
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+
+ // Check result
+ OCL_MAP_BUFFER(2);
+ for (uint32_t i = 0; i < n; ++i)
+ OCL_ASSERT(((int16_t*)buf_data[2])[i] = ((int16_t*)buf_data[0])[i] - ((int16_t*)buf_data[1])[i]);
+ free(buf_data[0]);
+ free(buf_data[1]);
+ buf_data[0] = buf_data[1] = NULL;
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_sub_shorts);
+
+
diff --git a/utests/compiler_switch.cpp b/utests/compiler_switch.cpp
new file mode 100644
index 0000000..6e93309
--- /dev/null
+++ b/utests/compiler_switch.cpp
@@ -0,0 +1,48 @@
+#include "utest_helper.hpp"
+
+static void cpu_compiler_switch(int *dst, int *src, int get_global_id0)
+{
+ switch (get_global_id0) {
+ case 0: dst[get_global_id0] = src[get_global_id0 + 4]; break;
+ case 1: dst[get_global_id0] = src[get_global_id0 + 14]; break;
+ case 2: dst[get_global_id0] = src[get_global_id0 + 13]; break;
+ case 6: dst[get_global_id0] = src[get_global_id0 + 11]; break;
+ case 7: dst[get_global_id0] = src[get_global_id0 + 10]; break;
+ case 10: dst[get_global_id0] = src[get_global_id0 + 9]; break;
+ case 12: dst[get_global_id0] = src[get_global_id0 + 6]; break;
+ default: dst[get_global_id0] = src[get_global_id0 + 8]; break;
+ }
+}
+
+static void compiler_switch(void)
+{
+ const size_t n = 32;
+ int cpu_dst[32], cpu_src[32];
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_switch");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = 16;
+ locals[0] = 16;
+
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < 32; ++i)
+ cpu_src[i] = ((int32_t*)buf_data[1])[i] = i;
+ OCL_UNMAP_BUFFER(1);
+ OCL_NDRANGE(1);
+
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (int i = 0; i < 16; ++i)
+ cpu_compiler_switch(cpu_dst, cpu_src, i);
+ for (int i = 0; i < 16; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[0])[i] == cpu_dst[i]);
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_switch)
+
diff --git a/utests/compiler_uint16_copy.cpp b/utests/compiler_uint16_copy.cpp
new file mode 100644
index 0000000..1494e81
--- /dev/null
+++ b/utests/compiler_uint16_copy.cpp
@@ -0,0 +1,35 @@
+#include "utest_helper.hpp"
+
+static void compiler_uint16_copy(void)
+{
+ const size_t n = 128;
+
+ // Setup kernel and buffers. Note that uint16 is aligned on 16 bytes
+ // according to the OCL specificatio
+ OCL_CREATE_KERNEL("compiler_uint16_copy");
+ buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t[16]) * n);
+ for (uint32_t i = 0; i < n; ++i)
+ for (uint32_t j = 0; j < 16; ++j)
+ ((uint32_t*)buf_data[0])[16*i+j] = 16*i+j;
+
+ OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t[16]), buf_data[0]);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t[16]), NULL);
+ free(buf_data[0]);
+ buf_data[0] = NULL;
+
+ // Run the kernel
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+
+ // Check result
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < 16*n; ++i)
+ OCL_ASSERT(((uint32_t*)buf_data[0])[i] == ((uint32_t*)buf_data[1])[i]);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_uint16_copy);
+
diff --git a/utests/compiler_uint2_copy.cpp b/utests/compiler_uint2_copy.cpp
new file mode 100644
index 0000000..8eb4314
--- /dev/null
+++ b/utests/compiler_uint2_copy.cpp
@@ -0,0 +1,31 @@
+#include "utest_helper.hpp"
+
+static void compiler_uint2_copy(void)
+{
+ const size_t n = 128;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_uint2_copy");
+ buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t[2]) * n);
+ for (uint32_t i = 0; i < 2*n; ++i) ((uint32_t*)buf_data[0])[i] = i;
+ OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t[2]), buf_data[0]);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t[2]), NULL);
+ free(buf_data[0]);
+ buf_data[0] = NULL;
+
+ // Run the kernel
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+
+ // Check result
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < 2*n; ++i)
+ OCL_ASSERT(((uint32_t*)buf_data[0])[i] == ((uint32_t*)buf_data[1])[i]);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_uint2_copy);
+
diff --git a/utests/compiler_uint3_copy.cpp b/utests/compiler_uint3_copy.cpp
new file mode 100644
index 0000000..c4d3cf0
--- /dev/null
+++ b/utests/compiler_uint3_copy.cpp
@@ -0,0 +1,40 @@
+#include "utest_helper.hpp"
+
+static void compiler_uint3_copy(void)
+{
+ const size_t n = 128;
+
+ // Setup kernel and buffers. Note that uint3 is aligned on 16 bytes
+ // according to the OCL specification
+ OCL_CREATE_KERNEL("compiler_uint3_copy");
+ buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t[4]) * n);
+ for (uint32_t i = 0; i < n; ++i) {
+ ((uint32_t*)buf_data[0])[4*i+0] = 3*i+0;
+ ((uint32_t*)buf_data[0])[4*i+1] = 3*i+1;
+ ((uint32_t*)buf_data[0])[4*i+2] = 3*i+2;
+ }
+
+ OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t[4]), buf_data[0]);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t[4]), NULL);
+ free(buf_data[0]);
+ buf_data[0] = NULL;
+
+ // Run the kernel
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+
+ // Check result
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < n; ++i) {
+ OCL_ASSERT(((uint32_t*)buf_data[0])[4*i+0] == ((uint32_t*)buf_data[1])[4*i+0]);
+ OCL_ASSERT(((uint32_t*)buf_data[0])[4*i+1] == ((uint32_t*)buf_data[1])[4*i+1]);
+ OCL_ASSERT(((uint32_t*)buf_data[0])[4*i+2] == ((uint32_t*)buf_data[1])[4*i+2]);
+ }
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_uint3_copy);
+
diff --git a/utests/compiler_uint3_unaligned_copy.cpp b/utests/compiler_uint3_unaligned_copy.cpp
new file mode 100644
index 0000000..d42b4c3
--- /dev/null
+++ b/utests/compiler_uint3_unaligned_copy.cpp
@@ -0,0 +1,42 @@
+#include "utest_helper.hpp"
+
+static void compiler_uint3_unaligned_copy(void)
+{
+ const size_t n = 128;
+
+ // Setup kernel and buffers. Note that uint3 is aligned on 16 bytes
+ // according to the OCL specification
+ OCL_CREATE_KERNEL("compiler_uint3_unaligned_copy");
+ buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t[4]) * n);
+ for (uint32_t i = 0; i < n; ++i) {
+ ((uint32_t*)buf_data[0])[3*i+0] = 3*i+0;
+ ((uint32_t*)buf_data[0])[3*i+1] = 3*i+1;
+ ((uint32_t*)buf_data[0])[3*i+2] = 3*i+2;
+ }
+
+ OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t[4]), buf_data[0]);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t[4]), NULL);
+ free(buf_data[0]);
+ buf_data[0] = NULL;
+
+ // Run the kernel
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+
+ // Check result
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < n; ++i) {
+ OCL_ASSERT(((uint32_t*)buf_data[0])[3*i+0] == ((uint32_t*)buf_data[1])[3*i+0]);
+ OCL_ASSERT(((uint32_t*)buf_data[0])[3*i+1] == ((uint32_t*)buf_data[1])[3*i+1]);
+ OCL_ASSERT(((uint32_t*)buf_data[0])[3*i+2] == ((uint32_t*)buf_data[1])[3*i+2]);
+ }
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_uint3_unaligned_copy);
+
+
+
diff --git a/utests/compiler_uint8_copy.cpp b/utests/compiler_uint8_copy.cpp
new file mode 100644
index 0000000..25dbd58
--- /dev/null
+++ b/utests/compiler_uint8_copy.cpp
@@ -0,0 +1,35 @@
+#include "utest_helper.hpp"
+
+static void compiler_uint8_copy(void)
+{
+ const size_t n = 128;
+
+ // Setup kernel and buffers. Note that uint8 is aligned on 16 bytes
+ // according to the OCL specification
+ OCL_CREATE_KERNEL("compiler_uint8_copy");
+ buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t[8]) * n);
+ for (uint32_t i = 0; i < n; ++i)
+ for (uint32_t j = 0; j < 8; ++j)
+ ((uint32_t*)buf_data[0])[8*i+j] = 8*i+j;
+
+ OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t[8]), buf_data[0]);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t[8]), NULL);
+ free(buf_data[0]);
+ buf_data[0] = NULL;
+
+ // Run the kernel
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+
+ // Check result
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < 8*n; ++i)
+ OCL_ASSERT(((uint32_t*)buf_data[0])[i] == ((uint32_t*)buf_data[1])[i]);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_uint8_copy);
+
diff --git a/utests/compiler_unstructured_branch0.cpp b/utests/compiler_unstructured_branch0.cpp
new file mode 100644
index 0000000..128a53e
--- /dev/null
+++ b/utests/compiler_unstructured_branch0.cpp
@@ -0,0 +1,55 @@
+#include "utest_helper.hpp"
+
+static void compiler_unstructured_branch0(void)
+{
+ const size_t n = 32;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_unstructured_branch0");
+ buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * n);
+ for (uint32_t i = 0; i < n; ++i) ((uint32_t*)buf_data[0])[i] = 2;
+ OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), buf_data[0]);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ free(buf_data[0]);
+ buf_data[0] = NULL;
+
+ // Run the kernel
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = 16;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+
+ // First control flow
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < 16; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == 2);
+ for (uint32_t i = 16; i < 32; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == 1);
+
+ // Second control flow
+ for (uint32_t i = 0; i < n; ++i) ((int32_t*)buf_data[0])[i] = -2;
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < 32; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == 1);
+
+ // Third control flow
+ for (uint32_t i = 0; i < 8; ++i) ((int32_t*)buf_data[0])[i] = 2;
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < 8; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == 2);
+ for (uint32_t i = 8; i < 32; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == 1);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_unstructured_branch0);
+
diff --git a/utests/compiler_unstructured_branch1.cpp b/utests/compiler_unstructured_branch1.cpp
new file mode 100644
index 0000000..6021f5b
--- /dev/null
+++ b/utests/compiler_unstructured_branch1.cpp
@@ -0,0 +1,54 @@
+#include "utest_helper.hpp"
+
+static void compiler_unstructured_branch1(void)
+{
+ const size_t n = 16;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_unstructured_branch1");
+ buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * n);
+ for (uint32_t i = 0; i < n; ++i) ((uint32_t*)buf_data[0])[i] = 2;
+ OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), buf_data[0]);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ free(buf_data[0]);
+ buf_data[0] = NULL;
+
+ // Run the kernel
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = 16;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+
+ // First control flow
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < n; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == 2);
+
+ // Second control flow
+ for (uint32_t i = 0; i < n; ++i) ((int32_t*)buf_data[0])[i] = -2;
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < n; ++i)
+ OCL_ASSERT(((uint32_t*)buf_data[1])[i] == 3);
+
+ // Third control flow
+ for (uint32_t i = 0; i < 8; ++i) ((int32_t*)buf_data[0])[i] = 2;
+ for (uint32_t i = 8; i < n; ++i) ((int32_t*)buf_data[0])[i] = -2;
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < 8; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == 2);
+ for (uint32_t i = 8; i < n; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == 3);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_unstructured_branch1);
+
diff --git a/utests/compiler_unstructured_branch2.cpp b/utests/compiler_unstructured_branch2.cpp
new file mode 100644
index 0000000..d61c6b5
--- /dev/null
+++ b/utests/compiler_unstructured_branch2.cpp
@@ -0,0 +1,68 @@
+#include "utest_helper.hpp"
+
+static void compiler_unstructured_branch2(void)
+{
+ const size_t n = 16;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_unstructured_branch2");
+ buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * n);
+ for (uint32_t i = 0; i < n; ++i) ((uint32_t*)buf_data[0])[i] = 2;
+ OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), buf_data[0]);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ free(buf_data[0]);
+ buf_data[0] = NULL;
+
+ // Run the kernel
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = 16;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+
+ // First control flow
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < n; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == 12);
+
+ // Second control flow
+ for (uint32_t i = 0; i < n; ++i) ((int32_t*)buf_data[0])[i] = -2;
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < n; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == -6);
+
+ // Third control flow
+ for (uint32_t i = 0; i < 8; ++i) ((int32_t*)buf_data[0])[i] = 2;
+ for (uint32_t i = 8; i < n; ++i) ((int32_t*)buf_data[0])[i] = -2;
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < 8; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == 12);
+ for (uint32_t i = 8; i < n; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == -6);
+
+ // Fourth control flow
+ for (uint32_t i = 0; i < 4; ++i) ((int32_t*)buf_data[0])[i] = 1;
+ for (uint32_t i = 4; i < 8; ++i) ((int32_t*)buf_data[0])[i] = 2;
+ for (uint32_t i = 8; i < n; ++i) ((int32_t*)buf_data[0])[i] = -2;
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < 8; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == 12);
+ for (uint32_t i = 8; i < n; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == -6);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_unstructured_branch2);
+
diff --git a/utests/compiler_unstructured_branch3.cpp b/utests/compiler_unstructured_branch3.cpp
new file mode 100644
index 0000000..0c6992a
--- /dev/null
+++ b/utests/compiler_unstructured_branch3.cpp
@@ -0,0 +1,58 @@
+#include "utest_helper.hpp"
+
+static void compiler_unstructured_branch3(void)
+{
+ const size_t n = 16;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_unstructured_branch3");
+ buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * n);
+ for (uint32_t i = 0; i < n; ++i) ((uint32_t*)buf_data[0])[i] = 2;
+ OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), buf_data[0]);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ free(buf_data[0]);
+ buf_data[0] = NULL;
+
+ // Run the kernel
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = 16;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+
+
+ // First control flow
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+
+ for (uint32_t i = 0; i < n; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == 2);
+
+ // Second control flow
+ for (uint32_t i = 0; i < n; ++i) ((int32_t*)buf_data[0])[i] = 0;
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < n; ++i)
+ OCL_ASSERT(((uint32_t*)buf_data[1])[i] == 3);
+
+ // Third control flow
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < 8; ++i) ((int32_t*)buf_data[0])[i] = 2;
+ for (uint32_t i = 8; i < n; ++i) ((int32_t*)buf_data[0])[i] = 0;
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < 8; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == 2);
+ for (uint32_t i = 8; i < n; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == 3);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_unstructured_branch3);
+
diff --git a/utests/compiler_write_only.cpp b/utests/compiler_write_only.cpp
new file mode 100644
index 0000000..3935535
--- /dev/null
+++ b/utests/compiler_write_only.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "utest_helper.hpp"
+
+static void compiler_write_only(void)
+{
+ const size_t n = 2048;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("test_write_only");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+
+ // Run the kernel
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(0);
+
+ // Check results
+ for (uint32_t i = 0; i < n; ++i)
+ OCL_ASSERT(((uint32_t*)buf_data[0])[i] == i);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_write_only);
+
diff --git a/utests/compiler_write_only_bytes.cpp b/utests/compiler_write_only_bytes.cpp
new file mode 100644
index 0000000..1a13cdb
--- /dev/null
+++ b/utests/compiler_write_only_bytes.cpp
@@ -0,0 +1,23 @@
+#include "utest_helper.hpp"
+
+void compiler_write_only_bytes(void)
+{
+ const size_t n = 32;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_write_only_bytes");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint8_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+
+ // Run the kernel
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(0);
+
+ // Check results
+ for (uint32_t i = 0; i < n; ++i)
+ OCL_ASSERT(((uint8_t*)buf_data[0])[i] == 2);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_write_only_bytes);
diff --git a/utests/compiler_write_only_shorts.cpp b/utests/compiler_write_only_shorts.cpp
new file mode 100644
index 0000000..19988fe
--- /dev/null
+++ b/utests/compiler_write_only_shorts.cpp
@@ -0,0 +1,24 @@
+#include "utest_helper.hpp"
+
+void compiler_write_only_shorts(void)
+{
+ const size_t n = 32;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_write_only_shorts");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint16_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+
+ // Run the kernel
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(0);
+
+ // Check results
+ for (uint32_t i = 0; i < n; ++i)
+ OCL_ASSERT(((uint16_t*)buf_data[0])[i] == 2);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_write_only_shorts);
+
diff --git a/utests/runtime_flat_address_space.cpp b/utests/runtime_flat_address_space.cpp
new file mode 100644
index 0000000..0357cbd
--- /dev/null
+++ b/utests/runtime_flat_address_space.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "utest_helper.hpp"
+
+int
+main(int argc, char *argv[])
+{
+ cl_mem dst[24];
+ int *dst_buffer = NULL;
+ const size_t n = 32 * 1024 * 1024;
+ const size_t global_work_size = n;
+ const size_t local_work_size = 16;
+ int status = 0;
+
+ if ((status = cl_test_init("test_write_only.cl", "test_write_only", SOURCE)) != 0)
+ goto error;
+
+ for (uint32_t j = 0; j < 24; ++j)
+ {
+ // Allocate the two buffers
+ dst[j] = clCreateBuffer(ctx, 0, n * sizeof(uint32_t), NULL, &status);
+ if (status != CL_SUCCESS) goto error;
+
+ // Set source and destination
+ OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &dst[j]);
+
+ // Run the kernel
+ OCL_CALL (clEnqueueNDRangeKernel, queue,
+ kernel,
+ 1,
+ NULL,
+ &global_work_size,
+ &local_work_size,
+ 0,
+ NULL,
+ NULL);
+
+ // Be sure that everything run fine
+ dst_buffer = (int *) clIntelMapBuffer(dst[j], &status);
+ if (status != CL_SUCCESS)
+ goto error;
+ for (uint32_t i = 0; i < n; ++i)
+ if (dst_buffer[i] != int(i)) {
+ fprintf(stderr, "run-time flat address space failed\n");
+ exit(-1);
+ }
+ OCL_CALL (clIntelUnmapBuffer, dst[j]);
+ }
+
+ for (uint32_t j = 0; j < 24; ++j) OCL_CALL (clReleaseMemObject, dst[j]);
+ cl_test_destroy();
+ printf("%i memory leaks\n", clIntelReportUnfreed());
+ assert(clIntelReportUnfreed() == 0);
+
+error:
+ return status;
+}
+
diff --git a/utests/utest.cpp b/utests/utest.cpp
new file mode 100644
index 0000000..fc3467e
--- /dev/null
+++ b/utests/utest.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file utest.cpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#include "utest.hpp"
+#include "utest_helper.hpp"
+#include <vector>
+#include <string>
+#include <iostream>
+#include <cstring>
+
+using namespace std;
+vector<UTest> *UTest::utestList = NULL;
+void releaseUTestList(void) { delete UTest::utestList; }
+
+UTest::UTest(Function fn, const char *name) : fn(fn), name(name) {
+ if (utestList == NULL) {
+ utestList = new vector<UTest>;
+ atexit(releaseUTestList);
+ }
+ utestList->push_back(*this);
+}
+
+UTest::UTest(void) : fn(NULL), name(NULL) {}
+
+static bool strequal(const char *s1, const char *s2) {
+ if (strcmp(s1, s2) == 0) return true;
+ return false;
+}
+
+void UTest::run(const char *name) {
+ if (name == NULL) return;
+ if (utestList == NULL) return;
+ for (size_t i = 0; i < utestList->size(); ++i) {
+ const UTest &utest = (*utestList)[i];
+ if (utest.name == NULL || utest.fn == NULL) continue;
+ if (strequal(utest.name, name)) {
+ std::cout << utest.name << ":" << std::endl;
+ (utest.fn)();
+ std::cout << std::endl;
+ cl_kernel_destroy();
+ cl_buffer_destroy();
+ }
+ }
+}
+
+void UTest::runAll(void) {
+ if (utestList == NULL) return;
+ for (size_t i = 0; i < utestList->size(); ++i) {
+ const UTest &utest = (*utestList)[i];
+ if (utest.fn == NULL) continue;
+ std::cout << utest.name << ":" << std::endl;
+ (utest.fn)();
+ std::cout << std::endl;
+ cl_kernel_destroy();
+ cl_buffer_destroy();
+ }
+}
+
diff --git a/utests/utest.hpp b/utests/utest.hpp
new file mode 100644
index 0000000..338a4dc
--- /dev/null
+++ b/utests/utest.hpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file utest.hpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ *
+ * Provides all unit test capabilites. It is rather rudimentary but it should
+ * do the job
+ */
+#ifndef __UTEST_UTEST_HPP__
+#define __UTEST_UTEST_HPP__
+
+#include "utest_exception.hpp"
+#include <vector>
+#include <iostream>
+
+/*! Quick and dirty unit test system with registration */
+struct UTest
+{
+ /*! A unit test function to run */
+ typedef void (*Function) (void);
+ /*! Empty test */
+ UTest(void);
+ /*! Build a new unit test and append it to the unit test list */
+ UTest(Function fn, const char *name);
+ /*! Function to execute */
+ Function fn;
+ /*! Name of the test */
+ const char *name;
+ /*! The tests that are registered */
+ static std::vector<UTest> *utestList;
+ /*! Run the test with the given name */
+ static void run(const char *name);
+ /*! Run all the tests */
+ static void runAll(void);
+};
+
+/*! Register a new unit test */
+#define UTEST_REGISTER(FN) static const UTest __##FN##__(FN, #FN);
+
+/*! Turn a function into a unit test */
+#define MAKE_UTEST_FROM_FUNCTION(FN) \
+ static void __ANON__##FN##__(void) { UTEST_EXPECT_SUCCESS(FN()); } \
+ static const UTest __##FN##__(__ANON__##FN##__, #FN);
+
+/*! No assert is expected */
+#define UTEST_EXPECT_SUCCESS(EXPR) \
+ do { \
+ try { \
+ EXPR; \
+ std::cout << " " << #EXPR << " [SUCCESS]" << std::endl; \
+ } \
+ catch (Exception e) { \
+ std::cout << " " << #EXPR << " [FAILED]" << std::endl; \
+ std::cout << " " << e.what() << std::endl; \
+ } \
+ } while (0)
+
+#define UTEST_EXPECT_FAILED(EXPR) \
+ do { \
+ try { \
+ EXPR; \
+ std::cout << " " << #EXPR << " [FAILED]" << std::endl; \
+ } \
+ catch (gbe::Exception e) { \
+ std::cout << " " << #EXPR << " [SUCCESS]" << std::endl; \
+ } \
+ } while (0)
+
+#endif /* __UTEST_UTEST_HPP__ */
+
diff --git a/utests/utest_assert.cpp b/utests/utest_assert.cpp
new file mode 100644
index 0000000..fe3a99c
--- /dev/null
+++ b/utests/utest_assert.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file assert.cpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#include "utest_assert.hpp"
+#include "utest_exception.hpp"
+#include <cassert>
+#include <cstdlib>
+
+void onFailedAssertion(const char *msg, const char *file, const char *fn, int line)
+{
+ char lineString[256];
+ sprintf(lineString, "%i", line);
+ assert(msg != NULL && file != NULL && fn != NULL);
+ const std::string str = "Error: "
+ + std::string(msg) + "\n at file "
+ + std::string(file)
+ + ", function " + std::string(fn)
+ + ", line " + std::string(lineString);
+ assert(0);
+ throw Exception(str);
+}
+
diff --git a/utests/utest_assert.hpp b/utests/utest_assert.hpp
new file mode 100644
index 0000000..f93f9ac
--- /dev/null
+++ b/utests/utest_assert.hpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file assert.hpp
+ *
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#ifndef __OCL_ASSERT_HPP__
+#define __OCL_ASSERT_HPP__
+
+/*! To ensure that condition truth. Optional message is supported */
+void onFailedAssertion(const char *msg, const char *file, const char *fn, int line);
+
+#define OCL_ASSERT(EXPR) \
+ do { \
+ if (!(EXPR)) \
+ onFailedAssertion(#EXPR, __FILE__, __FUNCTION__, __LINE__); \
+ } while (0)
+
+#define OCL_ASSERTM(EXPR, MSG) \
+ do { \
+ if (!(EXPR)) \
+ onFailedAssertion(MSG, __FILE__, __FUNCTION__, __LINE__); \
+ } while (0)
+
+#endif /* __OCL_ASSERT_HPP__ */
+
diff --git a/utests/utest_error.c b/utests/utest_error.c
new file mode 100644
index 0000000..449147b
--- /dev/null
+++ b/utests/utest_error.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "utest_error.h"
+#include "CL/cl.h"
+
+const char *err_msg[] = {
+ [-CL_SUCCESS] = "CL_SUCCESS",
+ [-CL_DEVICE_NOT_FOUND] = "CL_DEVICE_NOT_FOUND",
+ [-CL_DEVICE_NOT_AVAILABLE] = "CL_DEVICE_NOT_AVAILABLE",
+ [-CL_COMPILER_NOT_AVAILABLE] = "CL_COMPILER_NOT_AVAILABLE",
+ [-CL_MEM_ALLOCATION_FAILURE] = "CL_MEM_ALLOCATION_FAILURE",
+ [-CL_OUT_OF_RESOURCES] = "CL_OUT_OF_RESOURCES",
+ [-CL_OUT_OF_HOST_MEMORY] = "CL_OUT_OF_HOST_MEMORY",
+ [-CL_PROFILING_INFO_NOT_AVAILABLE] = "CL_PROFILING_INFO_NOT_AVAILABLE",
+ [-CL_MEM_COPY_OVERLAP] = "CL_MEM_COPY_OVERLAP",
+ [-CL_IMAGE_FORMAT_MISMATCH] = "CL_IMAGE_FORMAT_MISMATCH",
+ [-CL_IMAGE_FORMAT_NOT_SUPPORTED] = "CL_IMAGE_FORMAT_NOT_SUPPORTED",
+ [-CL_BUILD_PROGRAM_FAILURE] = "CL_BUILD_PROGRAM_FAILURE",
+ [-CL_MAP_FAILURE] = "CL_MAP_FAILURE",
+ [-CL_MISALIGNED_SUB_BUFFER_OFFSET] = "CL_MISALIGNED_SUB_BUFFER_OFFSET",
+ [-CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST] = "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST",
+ [-CL_INVALID_VALUE] = "CL_INVALID_VALUE",
+ [-CL_INVALID_DEVICE_TYPE] = "CL_INVALID_DEVICE_TYPE",
+ [-CL_INVALID_PLATFORM] = "CL_INVALID_PLATFORM",
+ [-CL_INVALID_DEVICE] = "CL_INVALID_DEVICE",
+ [-CL_INVALID_CONTEXT] = "CL_INVALID_CONTEXT",
+ [-CL_INVALID_QUEUE_PROPERTIES] = "CL_INVALID_QUEUE_PROPERTIES",
+ [-CL_INVALID_COMMAND_QUEUE] = "CL_INVALID_COMMAND_QUEUE",
+ [-CL_INVALID_HOST_PTR] = "CL_INVALID_HOST_PTR",
+ [-CL_INVALID_MEM] = "CL_INVALID_MEM",
+ [-CL_INVALID_IMAGE_FORMAT_DESCRIPTOR] = "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR",
+ [-CL_INVALID_IMAGE_SIZE] = "CL_INVALID_IMAGE_SIZE",
+ [-CL_INVALID_SAMPLER] = "CL_INVALID_SAMPLER",
+ [-CL_INVALID_BINARY] = "CL_INVALID_BINARY",
+ [-CL_INVALID_BUILD_OPTIONS] = "CL_INVALID_BUILD_OPTIONS",
+ [-CL_INVALID_PROGRAM] = "CL_INVALID_PROGRAM",
+ [-CL_INVALID_PROGRAM_EXECUTABLE] = "CL_INVALID_PROGRAM_EXECUTABLE",
+ [-CL_INVALID_KERNEL_NAME] = "CL_INVALID_KERNEL_NAME",
+ [-CL_INVALID_KERNEL_DEFINITION] = "CL_INVALID_KERNEL_DEFINITION",
+ [-CL_INVALID_KERNEL] = "CL_INVALID_KERNEL",
+ [-CL_INVALID_ARG_INDEX] = "CL_INVALID_ARG_INDEX",
+ [-CL_INVALID_ARG_VALUE] = "CL_INVALID_ARG_VALUE",
+ [-CL_INVALID_ARG_SIZE] = "CL_INVALID_ARG_SIZE",
+ [-CL_INVALID_KERNEL_ARGS] = "CL_INVALID_KERNEL_ARGS",
+ [-CL_INVALID_WORK_DIMENSION] = "CL_INVALID_WORK_DIMENSION",
+ [-CL_INVALID_WORK_GROUP_SIZE] = "CL_INVALID_WORK_GROUP_SIZE",
+ [-CL_INVALID_WORK_ITEM_SIZE] = "CL_INVALID_WORK_ITEM_SIZE",
+ [-CL_INVALID_GLOBAL_OFFSET] = "CL_INVALID_GLOBAL_OFFSET",
+ [-CL_INVALID_EVENT_WAIT_LIST] = "CL_INVALID_EVENT_WAIT_LIST",
+ [-CL_INVALID_EVENT] = "CL_INVALID_EVENT",
+ [-CL_INVALID_OPERATION] = "CL_INVALID_OPERATION",
+ [-CL_INVALID_GL_OBJECT] = "CL_INVALID_GL_OBJECT",
+ [-CL_INVALID_BUFFER_SIZE] = "CL_INVALID_BUFFER_SIZE",
+ [-CL_INVALID_MIP_LEVEL] = "CL_INVALID_MIP_LEVEL",
+ [-CL_INVALID_GLOBAL_WORK_SIZE] = "CL_INVALID_GLOBAL_WORK_SIZE",
+ [-CL_INVALID_PROPERTY] = "CL_INVALID_PROPERTY"
+};
+const size_t err_msg_n = sizeof(err_msg) / sizeof(err_msg[0]);
+
diff --git a/utests/utest_error.h b/utests/utest_error.h
new file mode 100644
index 0000000..2da29b0
--- /dev/null
+++ b/utests/utest_error.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __UTEST_ERROR_H__
+#define __UTEST_ERROR_H__
+#include <stdlib.h>
+extern const char *err_msg[];
+extern const size_t err_msg_n;
+#endif /* __UTEST_ERROR_H__ */
+
diff --git a/utests/utest_exception.hpp b/utests/utest_exception.hpp
new file mode 100644
index 0000000..e19141f
--- /dev/null
+++ b/utests/utest_exception.hpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file exception.hpp
+ *
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#ifndef __UTEST_EXCEPTION_HPP__
+#define __UTEST_EXCEPTION_HPP__
+
+#include <string>
+#include <exception>
+
+/*! Exception are only used while using unit tests */
+class Exception : public std::exception
+{
+public:
+ Exception(const std::string &msg) throw() : msg(msg) {}
+ Exception(const Exception &other) throw() : msg(other.msg) {}
+ ~Exception(void) throw() {}
+ Exception &operator= (const Exception &other) throw() {
+ this->msg = other.msg;
+ return *this;
+ }
+ const char *what(void) const throw() { return msg.c_str(); }
+private:
+ std::string msg; //!< String message
+};
+
+#endif /* __UTEST_EXCEPTION_HPP__ */
+
diff --git a/utests/utest_file_map.cpp b/utests/utest_file_map.cpp
new file mode 100644
index 0000000..da3361c
--- /dev/null
+++ b/utests/utest_file_map.cpp
@@ -0,0 +1,117 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "utest_file_map.hpp"
+#include "CL/cl.h"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <assert.h>
+#include <string.h>
+#include <stdio.h>
+
+int
+cl_file_map_init(cl_file_map_t *fm)
+{
+ assert(fm);
+ memset(fm,0,sizeof(*fm));
+ return CL_SUCCESS;
+}
+
+void
+cl_file_map_destroy(cl_file_map_t *fm)
+{
+ if (fm->mapped) {
+ munmap(fm->start, fm->size);
+ fm->start = fm->stop = 0;
+ fm->size = 0;
+ fm->mapped = CL_FALSE;
+ }
+ if(fm->fd) {
+ close(fm->fd);
+ fm->fd = 0;
+ }
+ free(fm->name);
+ memset(fm,0,sizeof(*fm));
+}
+
+void
+cl_file_map_delete(cl_file_map_t *fm)
+{
+ if (fm == NULL)
+ return;
+ cl_file_map_destroy(fm);
+ free(fm);
+}
+
+cl_file_map_t*
+cl_file_map_new(void)
+{
+ cl_file_map_t *fm = NULL;
+
+ if ((fm = (cl_file_map_t *) calloc(1, sizeof(cl_file_map_t))) == NULL)
+ goto error;
+ if (cl_file_map_init(fm) != CL_SUCCESS)
+ goto error;
+
+exit:
+ return fm;
+error:
+ cl_file_map_delete(fm);
+ fm = NULL;
+ goto exit;
+}
+
+int
+cl_file_map_open(cl_file_map_t *fm, const char *name)
+{
+ int err = CL_FILE_MAP_SUCCESS;
+
+ /* Open the file */
+ fm->fd = open(name, O_RDONLY);
+ if(fm->fd <= 0) {
+ err = CL_FILE_MAP_FILE_NOT_FOUND;
+ goto error;
+ }
+ if ((fm->name = (char*) calloc(strlen(name) + 1, sizeof(char))) == NULL)
+ goto error;
+ sprintf(fm->name, "%s", name);
+
+ /* Map it */
+ fm->size = lseek(fm->fd, 0, SEEK_END);
+ lseek(fm->fd, 0, SEEK_SET);
+ fm->start = mmap(0, fm->size, PROT_READ, MAP_SHARED, fm->fd, 0);
+ if(fm->start == NULL) {
+ err = CL_FILE_MAP_FAILED_TO_MMAP;
+ goto error;
+ }
+
+ fm->stop = ((char *) fm->start) + fm->size;
+ fm->mapped = CL_TRUE;
+
+exit:
+ return err;
+error:
+ cl_file_map_destroy(fm);
+ goto exit;
+}
+
diff --git a/utests/utest_file_map.hpp b/utests/utest_file_map.hpp
new file mode 100644
index 0000000..83d79ea
--- /dev/null
+++ b/utests/utest_file_map.hpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file assert.hpp
+ *
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#ifndef __UTEST_FILE_MAP_HPP__
+#define __UTEST_FILE_MAP_HPP__
+
+#include "CL/cl.h"
+#include <cstdlib>
+
+/* Map a file into memory for direct / cached / simple accesses */
+typedef struct cl_file_map {
+ void *start, *stop; /* First character and last one */
+ size_t size; /* Total size of the file */
+ int fd; /* Posix file descriptor */
+ cl_bool mapped; /* Indicate if a file was mapped or not */
+ char *name; /* File itself */
+} cl_file_map_t;
+
+/* Report information about an open temptative */
+enum {
+ CL_FILE_MAP_SUCCESS = 0,
+ CL_FILE_MAP_FILE_NOT_FOUND = 1,
+ CL_FILE_MAP_FAILED_TO_MMAP = 2
+};
+
+/* Allocate and Initialize a file mapper (but do not map any file */
+extern cl_file_map_t *cl_file_map_new(void);
+
+/* Initialize a file mapper (but do not map any file */
+extern int cl_file_map_init(cl_file_map_t *fm);
+
+/* Destroy but do not deallocate a file map */
+extern void cl_file_map_destroy(cl_file_map_t *fm);
+
+/* Destroy and free it */
+extern void cl_file_map_delete(cl_file_map_t *fm);
+
+/* Open a file and returns the error code */
+extern int cl_file_map_open(cl_file_map_t *fm, const char *name);
+
+static inline cl_bool
+cl_file_map_is_mapped(const cl_file_map_t *fm) {
+ return fm->mapped;
+}
+
+static inline const char*
+cl_file_map_begin(const cl_file_map_t *fm) {
+ return (const char*) fm->start;
+}
+
+static inline const char*
+cl_file_map_end(const cl_file_map_t *fm) {
+ return (const char*) fm->stop;
+}
+
+static inline size_t
+cl_file_map_size(const cl_file_map_t *fm) {
+ return fm->size;
+}
+
+#endif /* __UTEST_FILE_MAP_HPP__ */
+
diff --git a/utests/utest_helper.cpp b/utests/utest_helper.cpp
new file mode 100644
index 0000000..e586561
--- /dev/null
+++ b/utests/utest_helper.cpp
@@ -0,0 +1,482 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+#include "utest_file_map.hpp"
+#include "utest_helper.hpp"
+#include "utest_error.h"
+#include "CL/cl.h"
+#include "CL/cl_intel.h"
+
+#include <cstdio>
+#include <cstdint>
+#include <cstring>
+#include <cassert>
+#include <cmath>
+
+#define FATAL(...) \
+do { \
+ fprintf(stderr, "error: "); \
+ fprintf(stderr, __VA_ARGS__); \
+ fprintf(stderr, "\n");\
+ assert(0); \
+ exit(-1); \
+} while (0)
+
+#define FATAL_IF(COND, ...) \
+do { \
+ if (COND) FATAL(__VA_ARGS__); \
+} while (0)
+
+cl_platform_id platform = NULL;
+cl_device_id device = NULL;
+cl_context ctx = NULL;
+cl_program program = NULL;
+cl_kernel kernel = NULL;
+cl_command_queue queue = NULL;
+cl_mem buf[MAX_BUFFER_N] = {};
+void *buf_data[MAX_BUFFER_N] = {};
+size_t globals[3] = {};
+size_t locals[3] = {};
+
+static const char*
+cl_test_channel_order_string(cl_channel_order order)
+{
+ switch(order) {
+#define DECL_ORDER(WHICH) case CL_##WHICH: return "CL_"#WHICH
+ DECL_ORDER(R);
+ DECL_ORDER(A);
+ DECL_ORDER(RG);
+ DECL_ORDER(RA);
+ DECL_ORDER(RGB);
+ DECL_ORDER(RGBA);
+ DECL_ORDER(BGRA);
+ DECL_ORDER(ARGB);
+ DECL_ORDER(INTENSITY);
+ DECL_ORDER(LUMINANCE);
+ DECL_ORDER(Rx);
+ DECL_ORDER(RGx);
+ DECL_ORDER(RGBx);
+#undef DECL_ORDER
+ default: return "Unsupported image channel order";
+ };
+}
+
+static const char*
+cl_test_channel_type_string(cl_channel_type type)
+{
+ switch(type) {
+#define DECL_TYPE(WHICH) case CL_##WHICH: return "CL_"#WHICH
+ DECL_TYPE(SNORM_INT8);
+ DECL_TYPE(SNORM_INT16);
+ DECL_TYPE(UNORM_INT8);
+ DECL_TYPE(UNORM_INT16);
+ DECL_TYPE(UNORM_SHORT_565);
+ DECL_TYPE(UNORM_SHORT_555);
+ DECL_TYPE(UNORM_INT_101010);
+ DECL_TYPE(SIGNED_INT8);
+ DECL_TYPE(SIGNED_INT16);
+ DECL_TYPE(SIGNED_INT32);
+ DECL_TYPE(UNSIGNED_INT8);
+ DECL_TYPE(UNSIGNED_INT16);
+ DECL_TYPE(UNSIGNED_INT32);
+ DECL_TYPE(HALF_FLOAT);
+ DECL_TYPE(FLOAT);
+#undef DECL_TYPE
+ default: return "Unsupported image channel type";
+ };
+}
+
+static void
+clpanic(const char *msg, int rval)
+{
+ printf("Failed: %s (%d)\n", msg, rval);
+ exit(-1);
+}
+
+static char*
+do_kiss_path(const char *file, cl_device_id device)
+{
+ cl_int ver;
+ const char *sub_path = NULL;
+ char *ker_path = NULL;
+ const char *kiss_path = getenv("OCL_KERNEL_PATH");
+ size_t sz = strlen(file);
+
+ if (device == NULL)
+ sub_path = "";
+ else {
+ if (clIntelGetGenVersion(device, &ver) != CL_SUCCESS)
+ clpanic("Unable to get Gen version", -1);
+ sub_path = "";
+ }
+
+ if (kiss_path == NULL)
+ clpanic("set OCL_KERNEL_PATH. This is where the kiss kernels are", -1);
+ sz += strlen(kiss_path) + strlen(sub_path) + 2; /* +1 for end of string, +1 for '/' */
+ if ((ker_path = (char*) malloc(sz)) == NULL)
+ clpanic("Allocation failed", -1);
+ sprintf(ker_path, "%s/%s%s", kiss_path, sub_path, file);
+ return ker_path;
+}
+
+int
+cl_kernel_init(const char *file_name, const char *kernel_name, int format)
+{
+ cl_file_map_t *fm = NULL;
+ char *ker_path = NULL;
+ cl_int status = CL_SUCCESS;
+
+ /* Load the program and build it */
+ ker_path = do_kiss_path(file_name, device);
+ if (format == LLVM)
+ program = clCreateProgramWithLLVM(ctx, 1, &device, ker_path, &status);
+ else if (format == SOURCE) {
+ cl_file_map_t *fm = cl_file_map_new();
+ FATAL_IF (cl_file_map_open(fm, ker_path) != CL_FILE_MAP_SUCCESS,
+ "Failed to open file. Did you properly set OCL_KERNEL_PATH variable?");
+ const char *src = cl_file_map_begin(fm);
+ const size_t sz = cl_file_map_size(fm);
+ program = clCreateProgramWithSource(ctx, 1, &src, &sz, &status);
+ cl_file_map_delete(fm);
+ } else
+ FATAL("Not able to create program from binary");
+
+ if (status != CL_SUCCESS) {
+ fprintf(stderr, "error calling clCreateProgramWithBinary\n");
+ goto error;
+ }
+
+ /* OCL requires to build the program even if it is created from a binary */
+ OCL_CALL (clBuildProgram, program, 1, &device, NULL, NULL, NULL);
+
+ /* Create a kernel from the program */
+ kernel = clCreateKernel(program, kernel_name, &status);
+ if (status != CL_SUCCESS) {
+ fprintf(stderr, "error calling clCreateKernel\n");
+ goto error;
+ }
+
+exit:
+ free(ker_path);
+ cl_file_map_delete(fm);
+ return status;
+error:
+ goto exit;
+}
+
+int
+cl_ocl_init(void)
+{
+ cl_int status = CL_SUCCESS;
+ char name[128];
+ cl_uint platform_n;
+ size_t i;
+
+ /* Get the platform number */
+ OCL_CALL (clGetPlatformIDs, 0, NULL, &platform_n);
+ printf("platform number %u\n", platform_n);
+ assert(platform_n >= 1);
+
+ /* Get a valid platform */
+ OCL_CALL (clGetPlatformIDs, 1, &platform, &platform_n);
+ OCL_CALL (clGetPlatformInfo, platform, CL_PLATFORM_PROFILE, sizeof(name), name, NULL);
+ printf("platform_profile \"%s\"\n", name);
+ OCL_CALL (clGetPlatformInfo, platform, CL_PLATFORM_NAME, sizeof(name), name, NULL);
+ printf("platform_name \"%s\"\n", name);
+ OCL_CALL (clGetPlatformInfo, platform, CL_PLATFORM_VENDOR, sizeof(name), name, NULL);
+ printf("platform_vendor \"%s\"\n", name);
+ OCL_CALL (clGetPlatformInfo, platform, CL_PLATFORM_VERSION, sizeof(name), name, NULL);
+ printf("platform_version \"%s\"\n", name);
+
+ /* Get the device (only GPU device is supported right now) */
+ OCL_CALL (clGetDeviceIDs, platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
+ OCL_CALL (clGetDeviceInfo, device, CL_DEVICE_PROFILE, sizeof(name), name, NULL);
+ printf("device_profile \"%s\"\n", name);
+ OCL_CALL (clGetDeviceInfo, device, CL_DEVICE_NAME, sizeof(name), name, NULL);
+ printf("device_name \"%s\"\n", name);
+ OCL_CALL (clGetDeviceInfo, device, CL_DEVICE_VENDOR, sizeof(name), name, NULL);
+ printf("device_vendor \"%s\"\n", name);
+ OCL_CALL (clGetDeviceInfo, device, CL_DEVICE_VERSION, sizeof(name), name, NULL);
+ printf("device_version \"%s\"\n", name);
+
+ /* Now create a context */
+ ctx = clCreateContext(0, 1, &device, NULL, NULL, &status);
+ if (status != CL_SUCCESS) {
+ fprintf(stderr, "error calling clCreateContext\n");
+ goto error;
+ }
+
+ /* All image types currently supported by the context */
+ cl_image_format fmt[256];
+ cl_uint fmt_n;
+ clGetSupportedImageFormats(ctx, 0, CL_MEM_OBJECT_IMAGE2D, 256, fmt, &fmt_n);
+ printf("%u image formats are supported\n", fmt_n);
+ for (i = 0; i < fmt_n; ++i)
+ printf("[%s %s]\n",
+ cl_test_channel_order_string(fmt[i].image_channel_order),
+ cl_test_channel_type_string(fmt[i].image_channel_data_type));
+
+ /* We are going to push NDRange kernels here */
+ queue = clCreateCommandQueue(ctx, device, 0, &status);
+ if (status != CL_SUCCESS) {
+ fprintf(stderr, "error calling clCreateCommandQueue\n");
+ goto error;
+ }
+
+error:
+ return status;
+}
+
+int
+cl_test_init(const char *file_name, const char *kernel_name, int format)
+{
+ cl_int status = CL_SUCCESS;
+
+ /* Initialize OCL */
+ if ((status = cl_ocl_init()) != CL_SUCCESS)
+ goto error;
+
+ /* Load the kernel */
+ if ((status = cl_kernel_init(file_name, kernel_name, format)) != CL_SUCCESS)
+ goto error;
+
+error:
+ return status;
+}
+
+void
+cl_kernel_destroy(void)
+{
+ if (kernel) clReleaseKernel(kernel);
+ if (program) clReleaseProgram(program);
+ kernel = NULL;
+ program = NULL;
+}
+
+void
+cl_ocl_destroy(void)
+{
+ clReleaseCommandQueue(queue);
+ clReleaseContext(ctx);
+}
+
+void
+cl_test_destroy(void)
+{
+ cl_kernel_destroy();
+ cl_ocl_destroy();
+ printf("%i memory leaks\n", clIntelReportUnfreed());
+ assert(clIntelReportUnfreed() == 0);
+}
+
+void
+cl_buffer_destroy(void)
+{
+ int i;
+ for (i = 0; i < MAX_BUFFER_N; ++i) {
+ if (buf_data[i] != NULL) {
+ clIntelUnmapBuffer(buf[i]);
+ buf_data[i] = NULL;
+ }
+ if (buf[i] != NULL) {
+ clReleaseMemObject(buf[i]);
+ buf[i] = NULL;
+ }
+ }
+}
+
+void
+cl_report_perf_counters(cl_mem perf)
+{
+ cl_int status = CL_SUCCESS;
+ uint32_t *start = NULL, *end = NULL;
+ uint32_t i;
+ if (perf == NULL)
+ return;
+ start = (uint32_t*) clIntelMapBuffer(perf, &status);
+ assert(status == CL_SUCCESS && start != NULL);
+ end = start + 128;
+
+ printf("BEFORE\n");
+ for (i = 0; i < 6*8; ++i) {
+ if (i % 8 == 0) printf("\n");
+ printf("[%3u 0x%8x] ", i, start[i]);
+ }
+ printf("\n\n");
+
+ printf("AFTER\n");
+ for (i = 0; i < 6*8; ++i) {
+ if (i % 8 == 0) printf("\n");
+ printf("[%3u 0x%8x] ", i, end[i]);
+ }
+ printf("\n\n");
+
+ printf("DIFF\n");
+ for (i = 0; i < 6*8; ++i) {
+ if (i % 8 == 0) printf("\n");
+ printf("[%3u %8i] ", i, end[i] - start[i]);
+ }
+ printf("\n\n");
+
+ clIntelUnmapBuffer(perf);
+}
+
+struct bmphdr {
+ // 2 bytes of magic here, "BM", total header size is 54 bytes!
+ int filesize; // 4 total file size incl header
+ short as0, as1; // 8 app specific
+ int bmpoffset; // 12 ofset of bmp data
+ int headerbytes; // 16 bytes in header from this point (40 actually)
+ int width; // 20
+ int height; // 24
+ short nplanes; // 26 no of color planes
+ short bpp; // 28 bits/pixel
+ int compression; // 32 BI_RGB = 0 = no compression
+ int sizeraw; // 36 size of raw bmp file, excluding header, incl padding
+ int hres; // 40 horz resolutions pixels/meter
+ int vres; // 44
+ int npalcolors; // 48 No of colors in palette
+ int nimportant; // 52 No of important colors
+ // raw b, g, r data here, dword aligned per scan line
+};
+
+int *cl_read_bmp(const char *filename, int *width, int *height)
+{
+ struct bmphdr hdr;
+ char *bmppath = do_kiss_path(filename, device);
+ FILE *fp = fopen(bmppath, "rb");
+ assert(fp);
+
+ char magic[2];
+ fread(&magic[0], 1, 2, fp);
+ assert(magic[0] == 'B' && magic[1] == 'M');
+
+ fread(&hdr, 1, sizeof(hdr), fp);
+
+ assert(hdr.width > 0 && hdr.height > 0 && hdr.nplanes == 1 && hdr.compression == 0);
+
+ int *rgb32 = (int *) malloc(hdr.width * hdr.height * sizeof(int));
+ assert(rgb32);
+ int x, y;
+
+ int *dst = rgb32;
+ for (y = 0; y < hdr.height; y++) {
+ for (x = 0; x < hdr.width; x++) {
+ assert(!feof(fp));
+ int b = (getc(fp) & 0x0ff);
+ int g = (getc(fp) & 0x0ff);
+ int r = (getc(fp) & 0x0ff);
+ *dst++ = (r | (g << 8) | (b << 16) | 0xff000000); /* abgr */
+ }
+ while (x & 3) {
+ getc(fp);
+ x++;
+ } // each scanline padded to dword
+ // printf("read row %d\n", y);
+ // fflush(stdout);
+ }
+ fclose(fp);
+ *width = hdr.width;
+ *height = hdr.height;
+ free(bmppath);
+ return rgb32;
+}
+
+void cl_write_bmp(const int *data, int width, int height, const char *filename)
+{
+ int x, y;
+
+ FILE *fp = fopen(filename, "wb");
+ assert(fp);
+
+ char *raw = (char *) malloc(width * height * sizeof(int)); // at most
+ assert(raw);
+ char *p = raw;
+
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++) {
+ int c = *data++;
+ *p++ = ((c >> 16) & 0xff);
+ *p++ = ((c >> 8) & 0xff);
+ *p++ = ((c >> 0) & 0xff);
+ }
+ while (x & 3) {
+ *p++ = 0;
+ x++;
+ } // pad to dword
+ }
+ int sizeraw = p - raw;
+ int scanline = (width * 3 + 3) & ~3;
+ assert(sizeraw == scanline * height);
+
+ struct bmphdr hdr;
+
+ hdr.filesize = scanline * height + sizeof(hdr) + 2;
+ hdr.as0 = 0;
+ hdr.as1 = 0;
+ hdr.bmpoffset = sizeof(hdr) + 2;
+ hdr.headerbytes = 40;
+ hdr.width = width;
+ hdr.height = height;
+ hdr.nplanes = 1;
+ hdr.bpp = 24;
+ hdr.compression = 0;
+ hdr.sizeraw = sizeraw;
+ hdr.hres = 0; // 2834;
+ hdr.vres = 0; // 2834;
+ hdr.npalcolors = 0;
+ hdr.nimportant = 0;
+
+ /* Now write bmp file */
+ char magic[2] = { 'B', 'M' };
+ fwrite(&magic[0], 1, 2, fp);
+ fwrite(&hdr, 1, sizeof(hdr), fp);
+ fwrite(raw, 1, hdr.sizeraw, fp);
+
+ fclose(fp);
+ free(raw);
+}
+
+static const float pixel_threshold = 0.05f;
+static const float max_error_ratio = 0.001f;
+
+int cl_check_image(const int *img, int w, int h, const char *bmp)
+{
+ int refw, refh;
+ int *ref = cl_read_bmp(bmp, &refw, &refh);
+ if (ref == NULL || refw != w || refh != h) return 0;
+ const int n = w*h;
+ int discrepancy = 0;
+ for (int i = 0; i < n; ++i) {
+ const float r = (float) (img[i] & 0xff);
+ const float g = (float) ((img[i] >> 8) & 0xff);
+ const float b = (float) ((img[i] >> 16) & 0xff);
+ const float rr = (float) (ref[i] & 0xff);
+ const float rg = (float) ((ref[i] >> 8) & 0xff);
+ const float rb = (float) ((ref[i] >> 16) & 0xff);
+ const float dr = fabs(r-rr) / (1.f/255.f + std::max(r,rr));
+ const float dg = fabs(g-rg) / (1.f/255.f + std::max(g,rg));
+ const float db = fabs(b-rb) / (1.f/255.f + std::max(b,rb));
+ const float err = sqrtf(dr*dr+dg*dg+db*db);
+ if (err > pixel_threshold) discrepancy++;
+ }
+ free(ref);
+ return (float(discrepancy) / float(n) > max_error_ratio) ? 0 : 1;
+}
+
diff --git a/utests/utest_helper.hpp b/utests/utest_helper.hpp
new file mode 100644
index 0000000..5018433
--- /dev/null
+++ b/utests/utest_helper.hpp
@@ -0,0 +1,143 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file utest_helper.hpp
+ *
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#ifndef __UTEST_HELPER_HPP__
+#define __UTEST_HELPER_HPP__
+
+#include "CL/cl.h"
+#include "CL/cl_intel.h"
+#include "utest.hpp"
+#include "utest_assert.hpp"
+#include "utest_error.h"
+#include <cassert>
+#include <cstdio>
+#include <cstdlib>
+
+#define OCL_THROW_ERROR(FN, STATUS) \
+ do { \
+ char msg[2048]; \
+ sprintf(msg, "error calling %s with error%s \n", #FN, err_msg[-STATUS]); \
+ OCL_ASSERTM(false, msg); \
+ } while (0)
+
+#define OCL_CALL(FN, ...) \
+ do { \
+ int status = FN(__VA_ARGS__); \
+ if (status != CL_SUCCESS) OCL_THROW_ERROR(FN, status); \
+ } while (0)
+
+#define OCL_CREATE_KERNEL(NAME) \
+ do { \
+ OCL_CALL (cl_kernel_init, NAME".cl", NAME, SOURCE); \
+ } while (0)
+
+#define OCL_CREATE_BUFFER(BUFFER, FLAGS, SIZE, DATA) \
+ do { \
+ cl_int status; \
+ BUFFER = clCreateBuffer(ctx, FLAGS, SIZE, DATA, &status); \
+ if (status != CL_SUCCESS) OCL_THROW_ERROR(FN, status); \
+ } while (0)
+
+#define OCL_MAP_BUFFER(ID) \
+ do { \
+ cl_int status; \
+ buf_data[ID] = (int *) clIntelMapBuffer(buf[ID], &status); \
+ if (status != CL_SUCCESS) OCL_THROW_ERROR(FN, status); \
+ } while (0)
+
+#define OCL_UNMAP_BUFFER(ID) \
+ do { \
+ if (buf[ID] != NULL) { \
+ OCL_CALL (clIntelUnmapBuffer, buf[ID]); \
+ buf_data[ID] = NULL; \
+ } \
+ } while (0)
+
+#define OCL_NDRANGE(DIM_N) \
+ do { \
+ OCL_CALL (clEnqueueNDRangeKernel, queue, kernel, DIM_N, NULL, globals, locals, 0, NULL, NULL); \
+ } while (0)
+
+#define OCL_SET_ARG(ID, SIZE, ARG) \
+ do { \
+ OCL_CALL (clSetKernelArg, kernel, ID, SIZE, ARG); \
+ } while (0)
+
+#define OCL_CHECK_IMAGE(DATA, W, H, FILENAME) \
+ if (cl_check_image(DATA, W, H, FILENAME) == 0) \
+ OCL_ASSERTM(false, "image mismatch")
+
+enum { MAX_BUFFER_N = 16 };
+extern cl_platform_id platform;
+extern cl_device_id device;
+extern cl_context ctx;
+extern cl_program program;
+extern cl_kernel kernel;
+extern cl_command_queue queue;
+extern cl_mem buf[MAX_BUFFER_N];
+extern void* buf_data[MAX_BUFFER_N];
+extern size_t globals[3];
+extern size_t locals[3];
+
+enum {
+ SOURCE = 0,
+ LLVM = 1,
+ BIN = 2
+};
+
+/* Init OpenCL */
+extern int cl_ocl_init(void);
+
+/* Init program and kernel for the test */
+extern int cl_kernel_init(const char *file_name, const char *kernel_name, int format);
+
+/* init the bunch of global varaibles here */
+extern int cl_test_init(const char *file_name, const char *kernel_name, int format);
+
+/* Unmap and release all the created buffers */
+extern void cl_buffer_destroy(void);
+
+/* Release OCL queue, context and device */
+extern void cl_ocl_destroy(void);
+
+/* Release kernel and program */
+extern void cl_kernel_destroy(void);
+
+/* Release everything allocated in cl_test_init */
+extern void cl_test_destroy(void);
+
+/* Nicely output the performance counters */
+extern void cl_report_perf_counters(cl_mem perf);
+
+/* Read a bmp from file */
+extern int *cl_read_bmp(const char *filename, int *width, int *height);
+
+/* Write a bmp to a file */
+extern void cl_write_bmp(const int *data, int width, int height, const char *filename);
+
+/* Check data from img against bmp file located at "bmp" */
+extern int cl_check_image(const int *img, int w, int h, const char *bmp);
+
+#endif /* __UTEST_HELPER_HPP__ */
+
diff --git a/utests/utest_run.cpp b/utests/utest_run.cpp
new file mode 100644
index 0000000..e577b7b
--- /dev/null
+++ b/utests/utest_run.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**
+ * \file utest_run.cpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ *
+ * Just run the unit tests. The user can possibly provides the subset of it
+ */
+#include "utest_helper.hpp"
+#include "utest_exception.hpp"
+#include <iostream>
+
+int main(int argc, char *argv[])
+{
+ try {
+ cl_ocl_init();
+ if (argc >= 2)
+ for (int i = 1; i < argc; ++i)
+ UTest::run(argv[i]);
+ else
+ UTest::runAll();
+ cl_ocl_destroy();
+ } catch (Exception e) {
+ std::cout << " " << e.what() << " [SUCCESS]" << std::endl;
+ }
+}
+
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-opencl/beignet.git
More information about the Pkg-opencl-devel
mailing list