[Pkg-opencl-devel] [beignet] 23/66: Imported Upstream version 0.1+git20130514+19e9c58

Andreas Beckmann anbe at moszumanska.debian.org
Fri Oct 31 07:27:04 UTC 2014


This is an automated email from the git hooks/post-receive script.

anbe pushed a commit to branch master
in repository beignet.

commit f4bef4e7865b371a0b22d376800cf80f1485b641
Author: Simon Richter <sjr at debian.org>
Date:   Tue May 14 20:04:29 2013 +0200

    Imported Upstream version 0.1+git20130514+19e9c58
---
 CMakeLists.txt                             |  4 +-
 backend/src/backend/gen_context.cpp        | 14 ++++--
 backend/src/backend/gen_insn_selection.cpp | 37 ++++++++--------
 backend/src/ir/instruction.cpp             | 12 ++---
 backend/src/llvm/llvm_gen_backend.cpp      | 70 ++++++++++++++++++++++++++++--
 backend/src/llvm/llvm_gen_ocl_function.hxx | 14 ++++++
 backend/src/llvm/llvm_passes.cpp           |  4 +-
 backend/src/ocl_stdlib.h                   | 46 +++++++++++++++++++-
 kernels/test_copy_image_3d.cl              | 11 +++++
 kernels/test_fill_image_3d.cl              | 14 ++++++
 kernels/test_fill_image_3d_2.cl            | 10 +++++
 src/cl_api.c                               | 56 ++++++++++++++++++------
 src/cl_mem.c                               | 32 +++++++++++---
 utests/CMakeLists.txt                      |  3 ++
 utests/compiler_copy_image_3d.cpp          | 55 +++++++++++++++++++++++
 utests/compiler_fill_image_3d.cpp          | 44 +++++++++++++++++++
 utests/compiler_fill_image_3d_2.cpp        | 42 ++++++++++++++++++
 17 files changed, 416 insertions(+), 52 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4b402ee..6d0d291 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -55,8 +55,8 @@ ELSE (USE_FULSIM)
   ADD_DEFINITIONS(-DUSE_FULSIM=0)
 ENDIF (USE_FULSIM)
 
-SET(CMAKE_CXX_FLAGS "-Wall -Wno-invalid-offsetof -mfpmath=sse -fno-rtti -Wcast-align -std=c++0x -msse2 -msse3 -mssse3 -msse4.1 ")
-SET(CMAKE_C_FLAGS "-Wall -mfpmath=sse -msse2 -Wcast-align -msse2 -msse3 -mssse3 -msse4.1")
+SET(CMAKE_CXX_FLAGS "-Wall -Wno-invalid-offsetof -mfpmath=sse -fno-rtti -Wcast-align -std=c++0x -msse2 -msse3 -mssse3 -msse4.1 ${CMAKE_CXX_FLAGS}")
+SET(CMAKE_C_FLAGS "-Wall -mfpmath=sse -msse2 -Wcast-align -msse2 -msse3 -mssse3 -msse4.1 ${CMAKE_C_FLAGS}")
 
 # Front end stuff we need
 #INCLUDE(CMake/FindLLVM.cmake)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 1f867b8..4a16cae 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -291,6 +291,7 @@ namespace gbe
     const GenRegister sampler = ra->genReg(insn.src(5));
     const GenRegister ucoord = ra->genReg(insn.src(6));
     const GenRegister vcoord = ra->genReg(insn.src(7));
+    const GenRegister wcoord = ra->genReg(insn.src(8));
     const GenRegister temp = GenRegister::ud1grf(msgPayload.nr, msgPayload.subnr/sizeof(float) + 4);
     const GenRegister a0_0 = GenRegister::ud1arf(GEN_ARF_ADDRESS, 0);
     uint32_t simdWidth = p->curr.execWidth;
@@ -309,6 +310,8 @@ namespace gbe
     /* Prepare message payload. */
     p->MOV(GenRegister::f8grf(nr , 0), ucoord);
     p->MOV(GenRegister::f8grf(nr + (simdWidth/8), 0), vcoord);
+    if (insn.src(8).reg() != 0)
+      p->MOV(GenRegister::f8grf(nr + (simdWidth/4), 0), wcoord);
     p->SAMPLE(dst, msgPayload, a0_0, -1, 0);
 
     p->pop();
@@ -319,10 +322,11 @@ namespace gbe
     const GenRegister bti = ra->genReg(insn.src(0 + insn.extra.elem));
     const GenRegister ucoord = ra->genReg(insn.src(1 + insn.extra.elem));
     const GenRegister vcoord = ra->genReg(insn.src(2 + insn.extra.elem));
-    const GenRegister R = ra->genReg(insn.src(3 + insn.extra.elem));
-    const GenRegister G = ra->genReg(insn.src(4 + insn.extra.elem));
-    const GenRegister B = ra->genReg(insn.src(5 + insn.extra.elem));
-    const GenRegister A = ra->genReg(insn.src(6 + insn.extra.elem));
+    const GenRegister wcoord = ra->genReg(insn.src(3 + insn.extra.elem));
+    const GenRegister R = ra->genReg(insn.src(4 + insn.extra.elem));
+    const GenRegister G = ra->genReg(insn.src(5 + insn.extra.elem));
+    const GenRegister B = ra->genReg(insn.src(6 + insn.extra.elem));
+    const GenRegister A = ra->genReg(insn.src(7 + insn.extra.elem));
     const GenRegister a0_0 = GenRegister::ud1arf(GEN_ARF_ADDRESS, 0);
 
     p->push();
@@ -357,6 +361,8 @@ namespace gbe
                                         GenRegister::retype(GenRegister::QnPhysical(src,quarter), src.type))
       QUARTER_MOV0(nr + 1, ucoord);
       QUARTER_MOV0(nr + 2, vcoord);
+      if (insn.src(3 + insn.extra.elem).reg() != 0)
+        QUARTER_MOV0(nr + 3, wcoord);
       QUARTER_MOV1(nr + 5, R);
       QUARTER_MOV1(nr + 6, G);
       QUARTER_MOV1(nr + 7, B);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index ee8a0ab..2bd9aca 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -466,7 +466,7 @@ namespace gbe
     /*! Encode ternary instructions */
     void ALU3(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg src2);
     /*! Encode sample instructions */
-    void SAMPLE(GenRegister *dst, GenRegister *src, GenRegister *msgPayloads);
+    void SAMPLE(GenRegister *dst, uint32_t dstNum, GenRegister *src, uint32_t srcNum, GenRegister *msgPayloads, uint32_t msgNum);
     /*! Encode typed write instructions */
     void TYPED_WRITE(GenRegister *src, uint32_t srcNum, GenRegister *msgs, uint32_t msgNum);
     /*! Use custom allocators */
@@ -965,26 +965,26 @@ namespace gbe
     });
    }
  /* XXX always 4 return values? */
-  void Selection::Opaque::SAMPLE(GenRegister *dst, GenRegister *src, GenRegister *msgPayloads) {
-    uint32_t elemNum = 4;
-    SelectionInstruction *insn = this->appendInsn(SEL_OP_SAMPLE, elemNum, 8);
+  void Selection::Opaque::SAMPLE(GenRegister *dst, uint32_t dstNum, GenRegister *src, uint32_t srcNum, GenRegister *msgPayloads, uint32_t msgNum) {
+    SelectionInstruction *insn = this->appendInsn(SEL_OP_SAMPLE, dstNum, msgNum + srcNum);
     SelectionVector *dstVector = this->appendVector();
     SelectionVector *msgVector = this->appendVector();
 
     // Regular instruction to encode
-    for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
-    {
+    for (uint32_t elemID = 0; elemID < dstNum; ++elemID)
       insn->dst(elemID) = dst[elemID];
+    for (uint32_t elemID = 0; elemID < msgNum; ++elemID)
       insn->src(elemID) = msgPayloads[elemID];
-      insn->src(4 + elemID) = src[elemID];
-    }
+    for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
+      insn->src(msgNum + elemID) = src[elemID];
 
     // Sends require contiguous allocation
-    dstVector->regNum = 4;
+    dstVector->regNum = dstNum;
     dstVector->isSrc = 0;
     dstVector->reg = &insn->dst(0);
 
-    msgVector->regNum = 4;
+    // Only the messages require contiguous registers.
+    msgVector->regNum = msgNum;
     msgVector->isSrc = 1;
     msgVector->reg = &insn->src(0);
   }
@@ -1957,18 +1957,18 @@ namespace gbe
     {
       using namespace ir;
       GenRegister msgPayloads[4];
-      GenRegister dst[4], src[4];
+      GenRegister dst[insn.getDstNum()], src[insn.getSrcNum()];
 
       for( int i = 0; i < 4; ++i)
         msgPayloads[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
 
-      for (uint32_t valueID = 0; valueID < 4; ++valueID)
-      {
+      for (uint32_t valueID = 0; valueID < insn.getDstNum(); ++valueID)
         dst[valueID] = sel.selReg(insn.getDst(valueID), insn.getDstType());
+
+      for (uint32_t valueID = 0; valueID < insn.getSrcNum(); ++valueID)
         src[valueID] = sel.selReg(insn.getSrc(valueID), insn.getSrcType());
-      }
 
-      sel.SAMPLE(dst, src, msgPayloads);
+      sel.SAMPLE(dst, insn.getDstNum(), src, insn.getSrcNum(), msgPayloads, 4);
       return true;
     }
     DECL_CTOR(SampleInstruction, 1, 1);
@@ -1983,8 +1983,9 @@ namespace gbe
       const uint32_t simdWidth = sel.ctx.getSimdWidth();
       uint32_t valueID = 0;
       GenRegister msgs[9]; // (header + U + V + R + LOD + 4)
-      GenRegister src[7];
+      GenRegister src[insn.getSrcNum()];
       uint32_t msgNum = (8 / (simdWidth / 8)) + 1;
+      uint32_t coordNum = (insn.getSrcNum() == 7) ? 2 : 3;
 
       for(uint32_t i = 0; i < msgNum; i++)
         msgs[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
@@ -1992,8 +1993,8 @@ namespace gbe
       // bti always uses TYPE_U32.
       src[valueID] = sel.selReg(insn.getSrc(valueID), TYPE_U32);
       valueID++;
-
-      for (; valueID < 3; ++valueID)
+      // u, v, w coords should use coord type.
+      for (; valueID < 1 + coordNum; ++valueID)
         src[valueID] = sel.selReg(insn.getSrc(valueID), insn.getCoordType());
 
       for (; valueID < insn.getSrcNum(); ++valueID)
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 9fd4247..8980abf 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -414,6 +414,7 @@ namespace ir {
             << " sampler %" << this->getSrc(fn, 1)
             << " coord u %" << this->getSrc(fn, 2)
             << " coord v %" << this->getSrc(fn, 3)
+            << " coord w %" << this->getSrc(fn, 4)
             << " %" << this->getDst(fn, 0)
             << " %" << this->getDst(fn, 1)
             << " %" << this->getDst(fn, 2)
@@ -427,7 +428,7 @@ namespace ir {
       INLINE Type getSrcType(void) const { return this->srcType; }
       INLINE Type getDstType(void) const { return this->dstType; }
 
-      static const uint32_t srcNum = 4;
+      static const uint32_t srcNum = 5;
       static const uint32_t dstNum = 4;
     };
 
@@ -451,10 +452,11 @@ namespace ir {
             << " surface id %" << this->getSrc(fn, 0)
             << " coord u %" << this->getSrc(fn, 1)
             << " coord v %" << this->getSrc(fn, 2)
-            << " %" << this->getSrc(fn, 3)
+            << " coord w %" << this->getSrc(fn, 3)
             << " %" << this->getSrc(fn, 4)
             << " %" << this->getSrc(fn, 5)
-            << " %" << this->getSrc(fn, 6);
+            << " %" << this->getSrc(fn, 6)
+            << " %" << this->getSrc(fn, 7);
       }
 
       Tuple src;
@@ -463,8 +465,8 @@ namespace ir {
 
       INLINE Type getSrcType(void) const { return this->srcType; }
       INLINE Type getCoordType(void) const { return this->coordType; }
-      // bti, u, v, 4 data elements
-      static const uint32_t srcNum = 7;
+      // bti, u, v, w, 4 data elements
+      static const uint32_t srcNum = 8;
       Register dst[0];               //!< No dest register
     };
 
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 637e7be..ad465e2 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -102,13 +102,15 @@
 #if !defined(LLVM_VERSION_MAJOR) || (LLVM_VERSION_MINOR == 1)
 #include "llvm/Target/TargetData.h"
 #endif
+#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR == 2)
 #include "llvm/DataLayout.h"
+#endif
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
-#ifdef LLVM_32
+#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR <= 2)
 #include "llvm/Support/InstVisitor.h"
 #else
 #include "llvm/InstVisitor.h"
@@ -869,7 +871,13 @@ namespace gbe
       Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
 
       // Insert a new register for each function argument
+#if LLVM_VERSION_MINOR <= 1
+      const AttrListPtr &PAL = F.getAttributes();
+      uint32_t argID = 1; // Start at one actually
+      for (; I != E; ++I, ++argID) {
+#else
       for (; I != E; ++I) {
+#endif /* LLVM_VERSION_MINOR <= 1 */
         const std::string &argName = I->getName().str();
         Type *type = I->getType();
         GBE_ASSERTM(isScalarType(type) == true,
@@ -1820,6 +1828,12 @@ namespace gbe
       case GEN_OCL_WRITE_IMAGE3:
       case GEN_OCL_WRITE_IMAGE4:
       case GEN_OCL_WRITE_IMAGE5:
+      case GEN_OCL_WRITE_IMAGE10:
+      case GEN_OCL_WRITE_IMAGE11:
+      case GEN_OCL_WRITE_IMAGE12:
+      case GEN_OCL_WRITE_IMAGE13:
+      case GEN_OCL_WRITE_IMAGE14:
+      case GEN_OCL_WRITE_IMAGE15:
         break;
       case GEN_OCL_READ_IMAGE0:
       case GEN_OCL_READ_IMAGE1:
@@ -1827,6 +1841,12 @@ namespace gbe
       case GEN_OCL_READ_IMAGE3:
       case GEN_OCL_READ_IMAGE4:
       case GEN_OCL_READ_IMAGE5:
+      case GEN_OCL_READ_IMAGE10:
+      case GEN_OCL_READ_IMAGE11:
+      case GEN_OCL_READ_IMAGE12:
+      case GEN_OCL_READ_IMAGE13:
+      case GEN_OCL_READ_IMAGE14:
+      case GEN_OCL_READ_IMAGE15:
       {
       // dst is a 4 elements vector. We allocate all 4 registers here.
         uint32_t elemNum;
@@ -1962,11 +1982,26 @@ namespace gbe
           case GEN_OCL_READ_IMAGE3:
           case GEN_OCL_READ_IMAGE4:
           case GEN_OCL_READ_IMAGE5:
+          case GEN_OCL_READ_IMAGE10:
+          case GEN_OCL_READ_IMAGE11:
+          case GEN_OCL_READ_IMAGE12:
+          case GEN_OCL_READ_IMAGE13:
+          case GEN_OCL_READ_IMAGE14:
+          case GEN_OCL_READ_IMAGE15:
           {
             GBE_ASSERT(AI != AE); const ir::Register surface_id = this->getRegister(*AI); ++AI;
             GBE_ASSERT(AI != AE); const ir::Register sampler = this->getRegister(*AI); ++AI;
             GBE_ASSERT(AI != AE); const ir::Register ucoord = this->getRegister(*AI); ++AI;
             GBE_ASSERT(AI != AE); const ir::Register vcoord = this->getRegister(*AI); ++AI;
+            ir::Register wcoord;
+            if (it->second == GEN_OCL_READ_IMAGE10 ||
+                it->second == GEN_OCL_READ_IMAGE11 ||
+                it->second == GEN_OCL_READ_IMAGE12 ||
+                it->second == GEN_OCL_READ_IMAGE13 ||
+                it->second == GEN_OCL_READ_IMAGE14) {
+              GBE_ASSERT(AI != AE); wcoord = this->getRegister(*AI); ++AI;
+            } else
+              wcoord = ir::Register(0);
 
             vector<ir::Register> dstTupleData, srcTupleData;
             const uint32_t elemNum = 4;
@@ -1978,26 +2013,33 @@ namespace gbe
             srcTupleData.push_back(sampler);
             srcTupleData.push_back(ucoord);
             srcTupleData.push_back(vcoord);
+            srcTupleData.push_back(wcoord);
             const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], elemNum);
-            const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 4);
+            const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 5);
 
             ir::Type srcType = ir::TYPE_U32, dstType = ir::TYPE_U32;
 
             switch(it->second) {
               case GEN_OCL_READ_IMAGE0:
               case GEN_OCL_READ_IMAGE2:
+              case GEN_OCL_READ_IMAGE10:
+              case GEN_OCL_READ_IMAGE12:
                 srcType = dstType = ir::TYPE_U32;
                 break;
               case GEN_OCL_READ_IMAGE1:
               case GEN_OCL_READ_IMAGE3:
+              case GEN_OCL_READ_IMAGE11:
+              case GEN_OCL_READ_IMAGE13:
                 dstType = ir::TYPE_U32;
                 srcType = ir::TYPE_FLOAT;
                 break;
               case GEN_OCL_READ_IMAGE4:
+              case GEN_OCL_READ_IMAGE14:
                 dstType = ir::TYPE_FLOAT;
                 srcType = ir::TYPE_U32;
                 break;
               case GEN_OCL_READ_IMAGE5:
+              case GEN_OCL_READ_IMAGE15:
                 srcType = dstType = ir::TYPE_FLOAT;
                 break;
               default:
@@ -2013,41 +2055,63 @@ namespace gbe
           case GEN_OCL_WRITE_IMAGE3:
           case GEN_OCL_WRITE_IMAGE4:
           case GEN_OCL_WRITE_IMAGE5:
+          case GEN_OCL_WRITE_IMAGE10:
+          case GEN_OCL_WRITE_IMAGE11:
+          case GEN_OCL_WRITE_IMAGE12:
+          case GEN_OCL_WRITE_IMAGE13:
+          case GEN_OCL_WRITE_IMAGE14:
+          case GEN_OCL_WRITE_IMAGE15:
           {
             GBE_ASSERT(AI != AE); const ir::Register surface_id = this->getRegister(*AI); ++AI;
             GBE_ASSERT(AI != AE); const ir::Register ucoord = this->getRegister(*AI); ++AI;
             GBE_ASSERT(AI != AE); const ir::Register vcoord = this->getRegister(*AI); ++AI;
+            ir::Register wcoord;
+            if(it->second == GEN_OCL_WRITE_IMAGE10 ||
+               it->second == GEN_OCL_WRITE_IMAGE11 ||
+               it->second == GEN_OCL_WRITE_IMAGE12 ||
+               it->second == GEN_OCL_WRITE_IMAGE13 ||
+               it->second == GEN_OCL_WRITE_IMAGE14) {
+              GBE_ASSERT(AI != AE); wcoord = this->getRegister(*AI); ++AI;
+            } else
+              wcoord = ir::Register(0);
             GBE_ASSERT(AI != AE);
             vector<ir::Register> srcTupleData;
 
             srcTupleData.push_back(surface_id);
             srcTupleData.push_back(ucoord);
             srcTupleData.push_back(vcoord);
+            srcTupleData.push_back(wcoord);
 
             const uint32_t elemNum = 4;
             for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
               const ir::Register reg = this->getRegister(*AI, elemID);
               srcTupleData.push_back(reg);
             }
-            const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 7);
+            const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 8);
 
             ir::Type srcType = ir::TYPE_U32, coordType = ir::TYPE_U32;
 
             switch(it->second) {
               case GEN_OCL_WRITE_IMAGE0:
               case GEN_OCL_WRITE_IMAGE2:
+              case GEN_OCL_WRITE_IMAGE10:
+              case GEN_OCL_WRITE_IMAGE12:
                 srcType = coordType = ir::TYPE_U32;
                 break;
               case GEN_OCL_WRITE_IMAGE1:
               case GEN_OCL_WRITE_IMAGE3:
+              case GEN_OCL_WRITE_IMAGE11:
+              case GEN_OCL_WRITE_IMAGE13:
                 coordType = ir::TYPE_FLOAT;
                 srcType = ir::TYPE_U32;
                 break;
               case GEN_OCL_WRITE_IMAGE4:
+              case GEN_OCL_WRITE_IMAGE14:
                 srcType = ir::TYPE_FLOAT;
                 coordType = ir::TYPE_U32;
                 break;
               case GEN_OCL_WRITE_IMAGE5:
+              case GEN_OCL_WRITE_IMAGE15:
                 srcType = coordType = ir::TYPE_FLOAT;
                 break;
               default:
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 6b2e08b..055afa6 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -48,6 +48,13 @@ DECL_LLVM_GEN_FUNCTION(READ_IMAGE3, _Z22__gen_ocl_read_imageuijjff)
 DECL_LLVM_GEN_FUNCTION(READ_IMAGE4, _Z21__gen_ocl_read_imagefjjii)
 DECL_LLVM_GEN_FUNCTION(READ_IMAGE5, _Z21__gen_ocl_read_imagefjjff)
 
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE10, _Z21__gen_ocl_read_imageijjiii)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE11, _Z21__gen_ocl_read_imageijjfff)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE12, _Z22__gen_ocl_read_imageuijjiii)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE13, _Z22__gen_ocl_read_imageuijjfff)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE14, _Z21__gen_ocl_read_imagefjjiii)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE15, _Z21__gen_ocl_read_imagefjjfff)
+
 // To write_image functions.
 DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE0, _Z22__gen_ocl_write_imageijiiDv4_i)
 DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE1, _Z22__gen_ocl_write_imageijffDv4_i)
@@ -56,6 +63,13 @@ DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE5, _Z22__gen_ocl_write_imagefjffDv4_f)
 DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE2, _Z23__gen_ocl_write_imageuijiiDv4_j)
 DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE3, _Z23__gen_ocl_write_imageuijffDv4_j)
 
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE10, _Z22__gen_ocl_write_imageijiiiDv4_i)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE11, _Z22__gen_ocl_write_imageijfffDv4_i)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE12, _Z23__gen_ocl_write_imageuijiiiDv4_j)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE13, _Z23__gen_ocl_write_imageuijfffDv4_j)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE14, _Z22__gen_ocl_write_imagefjiiiDv4_f)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE15, _Z22__gen_ocl_write_imagefjfffDv4_f)
+
 // saturation related functions.
 DECL_LLVM_GEN_FUNCTION(SADD_SAT_CHAR, _Z12ocl_sadd_satcc)
 DECL_LLVM_GEN_FUNCTION(SADD_SAT_SHORT, _Z12ocl_sadd_satss)
diff --git a/backend/src/llvm/llvm_passes.cpp b/backend/src/llvm/llvm_passes.cpp
index 1a7a658..40c0e62 100644
--- a/backend/src/llvm/llvm_passes.cpp
+++ b/backend/src/llvm/llvm_passes.cpp
@@ -61,13 +61,15 @@
 #if !defined(LLVM_VERSION_MAJOR) || (LLVM_VERSION_MINOR == 1)
 #include "llvm/Target/TargetData.h"
 #endif
+#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR == 2)
 #include "llvm/DataLayout.h"
+#endif
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
-#ifdef LLVM_32
+#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR <= 2)
 #include "llvm/Support/InstVisitor.h"
 #else
 #include "llvm/InstVisitor.h"
diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
index a21f332..4c0d39c 100644
--- a/backend/src/ocl_stdlib.h
+++ b/backend/src/ocl_stdlib.h
@@ -775,6 +775,14 @@ OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, uint sampler, int u,
 OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, uint sampler, float u, float v);
 OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, uint sampler, int u, int v);
 OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, uint sampler, float u, float v);
+
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, uint sampler, int u, int v, int w);
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, uint sampler, float u, float v, float w);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, uint sampler, int u, int v, int w);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, uint sampler, float u, float v, float w);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, uint sampler, int u, int v, int w);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, uint sampler, float u, float v, float w);
+
 OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v, int4 color);
 OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, float u, float v, int4 color);
 OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, int v, uint4 color);
@@ -782,6 +790,13 @@ OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, float u, float v, uin
 OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, int v, float4 color);
 OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, float u, float v, float4 color);
 
+OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v, int w, int4 color);
+OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, float u, float v, float w, int4 color);
+OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, int v, int w, uint4 color);
+OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, float u, float v, float w, uint4 color);
+OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, int v, int w, float4 color);
+OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, float u, float v, float w, float4 color);
+
 #define GET_IMAGE(cl_image, surface_id) \
     uint surface_id = (uint)cl_image
 
@@ -809,10 +824,39 @@ DECL_IMAGE(int4, i)
 DECL_IMAGE(uint4, ui)
 DECL_IMAGE(float4, f)
 
-#undef GET_IMAGE
 #undef DECL_IMAGE
 #undef DECL_READ_IMAGE
 #undef DECL_WRITE_IMAGE
+
+#define DECL_READ_IMAGE(type, suffix, coord_type) \
+  INLINE_OVERLOADABLE type read_image ## suffix(image3d_t cl_image, sampler_t sampler, coord_type coord) \
+  {\
+    GET_IMAGE(cl_image, surface_id);\
+    return __gen_ocl_read_image ## suffix(surface_id, (uint)sampler, coord.s0, coord.s1, coord.s2);\
+  }
+
+#define DECL_WRITE_IMAGE(type, suffix, coord_type) \
+  INLINE_OVERLOADABLE void write_image ## suffix(image3d_t cl_image, coord_type coord, type color)\
+  {\
+    GET_IMAGE(cl_image, surface_id);\
+    __gen_ocl_write_image ## suffix(surface_id, coord.s0, coord.s1, coord.s2, color);\
+  }
+
+#define DECL_IMAGE(type, suffix)        \
+  DECL_READ_IMAGE(type, suffix, int4)   \
+  DECL_READ_IMAGE(type, suffix, float4) \
+  DECL_WRITE_IMAGE(type, suffix, int4)   \
+  DECL_WRITE_IMAGE(type, suffix, float4)
+
+DECL_IMAGE(int4, i)
+DECL_IMAGE(uint4, ui)
+DECL_IMAGE(float4, f)
+
+#undef DECL_IMAGE
+#undef DECL_READ_IMAGE
+#undef DECL_WRITE_IMAGE
+
+#undef GET_IMAGE
 #undef INLINE_OVERLOADABLE
 
 #undef PURE
diff --git a/kernels/test_copy_image_3d.cl b/kernels/test_copy_image_3d.cl
new file mode 100644
index 0000000..766227a
--- /dev/null
+++ b/kernels/test_copy_image_3d.cl
@@ -0,0 +1,11 @@
+__kernel void
+test_copy_image_3d(__read_only image3d_t src, __write_only image3d_t dst, sampler_t sampler)
+{
+  int4 coord;
+  int4 color;
+  coord.x = (int)get_global_id(0);
+  coord.y = (int)get_global_id(1);
+  coord.z = 0;
+  color = read_imagei(src, sampler, coord);
+  write_imagei(dst, coord, color);
+}
diff --git a/kernels/test_fill_image_3d.cl b/kernels/test_fill_image_3d.cl
new file mode 100644
index 0000000..0f0c6fd
--- /dev/null
+++ b/kernels/test_fill_image_3d.cl
@@ -0,0 +1,14 @@
+__kernel void
+test_fill_image_3d(__write_only image3d_t dst, uint color)
+{
+  int4 coord;
+  int4 color4;
+  color4.s0  = (color >> 24) & 0xFF;
+  color4.s1  = (color >> 16) & 0xFF;
+  color4.s2  = (color >> 8) & 0xFF;
+  color4.s3  = color & 0xFF;
+  coord.x = (int)get_global_id(0);
+  coord.y = (int)get_global_id(1);
+  coord.z = 0;
+  write_imagei(dst, coord, color4);
+}
diff --git a/kernels/test_fill_image_3d_2.cl b/kernels/test_fill_image_3d_2.cl
new file mode 100644
index 0000000..22b6452
--- /dev/null
+++ b/kernels/test_fill_image_3d_2.cl
@@ -0,0 +1,10 @@
+__kernel void
+test_fill_image_3d_2(__write_only image3d_t dst)
+{
+  int4 coord;
+  int4 color4 = {0x12, 0x34, 0x56, 0x78};
+  coord.x = (int)get_global_id(0);
+  coord.y = (int)get_global_id(1);
+  coord.z = 0;
+  write_imagei(dst, coord, color4);
+}
diff --git a/src/cl_api.c b/src/cl_api.c
index 03cc0e6..cfbb44f 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -159,7 +159,24 @@ clGetContextInfo(cl_context      context,
                  void *          param_value,
                  size_t *        param_value_size_ret)
 {
-  NOT_IMPLEMENTED;
+  switch (param_name) {
+    case CL_CONTEXT_DEVICES:
+      if (param_value) {
+        if (param_value_size < sizeof(cl_device_id))
+          return CL_INVALID_VALUE;
+          cl_device_id *device_list = (cl_device_id*)param_value;
+          device_list[0] = context->device;
+          if (param_value_size_ret)
+            *param_value_size_ret = sizeof(cl_device_id);
+          return CL_SUCCESS;
+        }
+        if (param_value_size_ret) {
+          *param_value_size_ret = sizeof(cl_device_id);
+          return CL_SUCCESS;
+        }
+    default:
+      NOT_IMPLEMENTED;
+  }
   return 0;
 }
 
@@ -326,8 +343,28 @@ clCreateImage3D(cl_context              context,
                 void *                  host_ptr,
                 cl_int *                errcode_ret)
 {
-  NOT_IMPLEMENTED;
-  return NULL;
+  cl_mem mem = NULL;
+  cl_int err = CL_SUCCESS;
+  CHECK_CONTEXT (context);
+  cl_image_desc image_desc;
+
+  image_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
+  image_desc.image_width = image_width;
+  image_desc.image_height = image_height;
+  image_desc.image_depth = image_depth;
+  image_desc.image_row_pitch = image_row_pitch;
+  image_desc.image_slice_pitch = image_slice_pitch;
+
+  mem = cl_mem_new_image(context,
+                         flags,
+                         image_format,
+                         &image_desc,
+                         host_ptr,
+                         errcode_ret);
+error:
+  if (errcode_ret)
+    *errcode_ret = err;
+  return mem;
 }
 
 cl_int
@@ -1056,13 +1093,6 @@ clEnqueueNDRangeKernel(cl_command_queue  command_queue,
     goto error;
   }
 
-  /* Local size must be non-null */
-  for (i = 0; i < work_dim; ++i)
-    if (UNLIKELY(local_work_size[i] == 0)) {
-      err = CL_INVALID_WORK_GROUP_SIZE;
-      goto error;
-    }
-
   /* Check offset values. We add a non standard restriction. The offsets must
    * also be evenly divided by the local sizes
    */
@@ -1072,16 +1102,16 @@ clEnqueueNDRangeKernel(cl_command_queue  command_queue,
         err = CL_INVALID_GLOBAL_OFFSET;
         goto error;
       }
-      if (UNLIKELY(global_work_offset[i] % local_work_size[i])) {
+      if (UNLIKELY(local_work_size != NULL && global_work_offset[i] % local_work_size[i])) {
         err = CL_INVALID_GLOBAL_OFFSET;
         goto error;
       }
     }
 
-  /* Local sizes must divide global sizes */
+  /* Local sizes must be non-null and divide global sizes */
   if (local_work_size != NULL) 
     for (i = 0; i < work_dim; ++i) 
-      if (UNLIKELY(global_work_size[i] % local_work_size[i])) {
+      if (UNLIKELY(local_work_size[i] == 0 || global_work_size[i] % local_work_size[i])) {
         err = CL_INVALID_WORK_GROUP_SIZE;
         goto error;
       }
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 690e5cf..6950590 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -307,8 +307,9 @@ _cl_mem_new_image(cl_context ctx,
                   const cl_mem_object_type image_type,
                   size_t w,
                   size_t h,
+                  size_t depth,
                   size_t pitch,
-                  int depth,
+                  size_t slice_pitch,
                   void *data,
                   cl_int *errcode_ret)
 {
@@ -356,6 +357,27 @@ _cl_mem_new_image(cl_context ctx,
     /* Pick up tiling mode (we do only linear on SNB) */
     if (cl_driver_get_ver(ctx->drv) != 6)
       tiling = CL_TILE_Y;
+    depth = 1;
+  }
+
+  if (image_type == CL_MEM_OBJECT_IMAGE3D) {
+    size_t min_pitch = bpp * w;
+    if (data && pitch == 0)
+      pitch = min_pitch;
+    size_t min_slice_pitch = min_pitch * h;
+    if (data && slice_pitch == 0)
+      slice_pitch = min_slice_pitch;
+    if (UNLIKELY(w > ctx->device->image3d_max_width)) DO_IMAGE_ERROR;
+    if (UNLIKELY(h > ctx->device->image3d_max_height)) DO_IMAGE_ERROR;
+    if (UNLIKELY(depth > ctx->device->image3d_max_depth)) DO_IMAGE_ERROR;
+    if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR;
+    if (UNLIKELY(data && min_slice_pitch > slice_pitch)) DO_IMAGE_ERROR;
+    if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR;
+    if (UNLIKELY(!data && slice_pitch != 0)) DO_IMAGE_ERROR;
+
+    /* Pick up tiling mode (we do only linear on SNB) */
+    if (cl_driver_get_ver(ctx->drv) != 6)
+      tiling = CL_TILE_Y;
   }
 #undef DO_IMAGE_ERROR
 
@@ -371,7 +393,7 @@ _cl_mem_new_image(cl_context ctx,
     aligned_h     = ALIGN(h, tiley_h);
   }
 
-  sz = aligned_pitch * aligned_h;
+  sz = aligned_pitch * aligned_h * depth;
   mem = cl_mem_allocate(ctx, flags, sz, tiling != CL_NO_TILE, &err);
   if (mem == NULL || err != CL_SUCCESS)
     goto error;
@@ -417,11 +439,11 @@ cl_mem_new_image(cl_context context,
   switch (image_desc->image_type) {
   case CL_MEM_OBJECT_IMAGE1D:
   case CL_MEM_OBJECT_IMAGE2D:
+  case CL_MEM_OBJECT_IMAGE3D:
     return _cl_mem_new_image(context, flags, image_format, image_desc->image_type,
-                             image_desc->image_width, image_desc->image_height,
-                             image_desc->image_row_pitch, image_desc->image_depth,
+                             image_desc->image_width, image_desc->image_height, image_desc->image_depth,
+                             image_desc->image_row_pitch, image_desc->image_slice_pitch,
                              host_ptr, errcode_ret);
-  case CL_MEM_OBJECT_IMAGE3D:
   case CL_MEM_OBJECT_IMAGE2D_ARRAY:
   case CL_MEM_OBJECT_IMAGE1D_ARRAY:
   case CL_MEM_OBJECT_IMAGE1D_BUFFER:
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 1275faa..20d5456 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -23,10 +23,13 @@ set (utests_sources
   compiler_convert_uchar_sat.cpp
   compiler_copy_buffer.cpp
   compiler_copy_image.cpp
+  compiler_copy_image_3d.cpp
   compiler_copy_buffer_row.cpp
   compiler_fabs.cpp
   compiler_fill_image.cpp
   compiler_fill_image0.cpp
+  compiler_fill_image_3d.cpp
+  compiler_fill_image_3d_2.cpp
   compiler_function_argument0.cpp
   compiler_function_argument1.cpp
   compiler_function_argument.cpp
diff --git a/utests/compiler_copy_image_3d.cpp b/utests/compiler_copy_image_3d.cpp
new file mode 100644
index 0000000..9816fe4
--- /dev/null
+++ b/utests/compiler_copy_image_3d.cpp
@@ -0,0 +1,55 @@
+#include "utest_helper.hpp"
+
+static void compiler_copy_image_3d(void)
+{
+  const size_t w = 512;
+  const size_t h = 512;
+  const size_t depth = 1;
+  cl_image_format format;
+  cl_image_desc desc;
+  cl_sampler sampler;
+
+  // Setup kernel and images
+  OCL_CREATE_KERNEL("test_copy_image_3d");
+  buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * w * h * depth);
+  for (uint32_t k = 0; k < depth; k++)
+    for (uint32_t j = 0; j < h; j++)
+      for (uint32_t i = 0; i < w; i++)
+        ((uint32_t*)buf_data[0])[k*w*h + j*w + i] = k*w*h + j*w + i;
+
+  format.image_channel_order = CL_RGBA;
+  format.image_channel_data_type = CL_UNSIGNED_INT8;
+  desc.image_type = CL_MEM_OBJECT_IMAGE3D;
+  desc.image_width = w;
+  desc.image_height = h;
+  desc.image_depth = depth;
+  desc.image_row_pitch = 0;
+  desc.image_slice_pitch = 0;
+  OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, &format, &desc, buf_data[0]);
+  OCL_CREATE_IMAGE(buf[1], 0, &format, &desc, NULL);
+  OCL_CREATE_SAMPLER(sampler, CL_ADDRESS_REPEAT, CL_FILTER_NEAREST);
+  free(buf_data[0]);
+  buf_data[0] = NULL;
+
+  // Run the kernel
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  OCL_SET_ARG(2, sizeof(sampler), &sampler);
+  globals[0] = w;
+  globals[1] = h;
+  locals[0] = 16;
+  locals[1] = 16;
+  OCL_NDRANGE(2);
+
+  // Check result
+  OCL_MAP_BUFFER(0);
+  OCL_MAP_BUFFER(1);
+  for (uint32_t k = 0; k < depth; k++)
+    for (uint32_t j = 0; j < h; ++j)
+      for (uint32_t i = 0; i < w; i++)
+        OCL_ASSERT(((uint32_t*)buf_data[0])[k*w*h + j*w + i] == ((uint32_t*)buf_data[1])[k*w*h + j*w + i]);
+  OCL_UNMAP_BUFFER(0);
+  OCL_UNMAP_BUFFER(1);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_copy_image_3d);
diff --git a/utests/compiler_fill_image_3d.cpp b/utests/compiler_fill_image_3d.cpp
new file mode 100644
index 0000000..5d98b0b
--- /dev/null
+++ b/utests/compiler_fill_image_3d.cpp
@@ -0,0 +1,44 @@
+#include "utest_helper.hpp"
+
+static void compiler_fill_image_3d(void)
+{
+  const size_t w = 512;
+  const size_t h = 512;
+  const size_t depth = 1;
+  uint32_t color = 0x12345678;
+  cl_image_format format;
+  cl_image_desc desc;
+
+  format.image_channel_order = CL_RGBA;
+  format.image_channel_data_type = CL_UNSIGNED_INT8;
+  desc.image_type = CL_MEM_OBJECT_IMAGE3D;
+  desc.image_width = w;
+  desc.image_height = h;
+  desc.image_depth = depth;
+  desc.image_row_pitch = 0;
+  desc.image_slice_pitch = 0;
+
+  // Setup kernel and images
+  OCL_CREATE_KERNEL("test_fill_image_3d");
+
+  OCL_CREATE_IMAGE(buf[0], 0, &format, &desc, NULL);
+
+  // Run the kernel
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(color), &color);
+  globals[0] = w;
+  globals[1] = h;
+  locals[0] = 16;
+  locals[1] = 16;
+  OCL_NDRANGE(2);
+
+  // Check result
+  OCL_MAP_BUFFER(0);
+  for (uint32_t k = 0; k < depth; k++)
+    for (uint32_t j = 0; j < h; ++j)
+      for (uint32_t i = 0; i < w; i++)
+        OCL_ASSERT(((uint32_t*)buf_data[0])[k*w*h + j*w + i] == 0x78563412);
+  OCL_UNMAP_BUFFER(0);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_fill_image_3d);
diff --git a/utests/compiler_fill_image_3d_2.cpp b/utests/compiler_fill_image_3d_2.cpp
new file mode 100644
index 0000000..3b4edb9
--- /dev/null
+++ b/utests/compiler_fill_image_3d_2.cpp
@@ -0,0 +1,42 @@
+#include "utest_helper.hpp"
+
+static void compiler_fill_image_3d_2(void)
+{
+  const size_t w = 512;
+  const size_t h = 512;
+  const size_t depth = 1;
+  cl_image_format format;
+  cl_image_desc desc;
+
+  format.image_channel_order = CL_RGBA;
+  format.image_channel_data_type = CL_UNSIGNED_INT8;
+  desc.image_type = CL_MEM_OBJECT_IMAGE3D;
+  desc.image_width = w;
+  desc.image_height = h;
+  desc.image_depth = depth;
+  desc.image_row_pitch = 0;
+  desc.image_slice_pitch = 0;
+
+  // Setup kernel and images
+  OCL_CREATE_KERNEL("test_fill_image_3d_2");
+
+  OCL_CREATE_IMAGE(buf[0], 0, &format, &desc, NULL);
+
+  // Run the kernel
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  globals[0] = w;
+  globals[1] = h;
+  locals[0] = 16;
+  locals[1] = 16;
+  OCL_NDRANGE(2);
+
+  // Check result
+  OCL_MAP_BUFFER(0);
+  for (uint32_t k = 0; k < depth; k++)
+    for (uint32_t j = 0; j < h; ++j)
+      for (uint32_t i = 0; i < w; i++)
+        OCL_ASSERT(((uint32_t*)buf_data[0])[k*w*h + j*w + i] == 0x78563412);
+  OCL_UNMAP_BUFFER(0);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_fill_image_3d_2);

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-opencl/beignet.git



More information about the Pkg-opencl-devel mailing list