[Pkg-opencl-devel] [beignet] 23/66: Imported Upstream version 0.1+git20130514+19e9c58
Andreas Beckmann
anbe at moszumanska.debian.org
Fri Oct 31 07:27:04 UTC 2014
This is an automated email from the git hooks/post-receive script.
anbe pushed a commit to branch master
in repository beignet.
commit f4bef4e7865b371a0b22d376800cf80f1485b641
Author: Simon Richter <sjr at debian.org>
Date: Tue May 14 20:04:29 2013 +0200
Imported Upstream version 0.1+git20130514+19e9c58
---
CMakeLists.txt | 4 +-
backend/src/backend/gen_context.cpp | 14 ++++--
backend/src/backend/gen_insn_selection.cpp | 37 ++++++++--------
backend/src/ir/instruction.cpp | 12 ++---
backend/src/llvm/llvm_gen_backend.cpp | 70 ++++++++++++++++++++++++++++--
backend/src/llvm/llvm_gen_ocl_function.hxx | 14 ++++++
backend/src/llvm/llvm_passes.cpp | 4 +-
backend/src/ocl_stdlib.h | 46 +++++++++++++++++++-
kernels/test_copy_image_3d.cl | 11 +++++
kernels/test_fill_image_3d.cl | 14 ++++++
kernels/test_fill_image_3d_2.cl | 10 +++++
src/cl_api.c | 56 ++++++++++++++++++------
src/cl_mem.c | 32 +++++++++++---
utests/CMakeLists.txt | 3 ++
utests/compiler_copy_image_3d.cpp | 55 +++++++++++++++++++++++
utests/compiler_fill_image_3d.cpp | 44 +++++++++++++++++++
utests/compiler_fill_image_3d_2.cpp | 42 ++++++++++++++++++
17 files changed, 416 insertions(+), 52 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4b402ee..6d0d291 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -55,8 +55,8 @@ ELSE (USE_FULSIM)
ADD_DEFINITIONS(-DUSE_FULSIM=0)
ENDIF (USE_FULSIM)
-SET(CMAKE_CXX_FLAGS "-Wall -Wno-invalid-offsetof -mfpmath=sse -fno-rtti -Wcast-align -std=c++0x -msse2 -msse3 -mssse3 -msse4.1 ")
-SET(CMAKE_C_FLAGS "-Wall -mfpmath=sse -msse2 -Wcast-align -msse2 -msse3 -mssse3 -msse4.1")
+SET(CMAKE_CXX_FLAGS "-Wall -Wno-invalid-offsetof -mfpmath=sse -fno-rtti -Wcast-align -std=c++0x -msse2 -msse3 -mssse3 -msse4.1 ${CMAKE_CXX_FLAGS}")
+SET(CMAKE_C_FLAGS "-Wall -mfpmath=sse -msse2 -Wcast-align -msse2 -msse3 -mssse3 -msse4.1 ${CMAKE_C_FLAGS}")
# Front end stuff we need
#INCLUDE(CMake/FindLLVM.cmake)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 1f867b8..4a16cae 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -291,6 +291,7 @@ namespace gbe
const GenRegister sampler = ra->genReg(insn.src(5));
const GenRegister ucoord = ra->genReg(insn.src(6));
const GenRegister vcoord = ra->genReg(insn.src(7));
+ const GenRegister wcoord = ra->genReg(insn.src(8));
const GenRegister temp = GenRegister::ud1grf(msgPayload.nr, msgPayload.subnr/sizeof(float) + 4);
const GenRegister a0_0 = GenRegister::ud1arf(GEN_ARF_ADDRESS, 0);
uint32_t simdWidth = p->curr.execWidth;
@@ -309,6 +310,8 @@ namespace gbe
/* Prepare message payload. */
p->MOV(GenRegister::f8grf(nr , 0), ucoord);
p->MOV(GenRegister::f8grf(nr + (simdWidth/8), 0), vcoord);
+ if (insn.src(8).reg() != 0)
+ p->MOV(GenRegister::f8grf(nr + (simdWidth/4), 0), wcoord);
p->SAMPLE(dst, msgPayload, a0_0, -1, 0);
p->pop();
@@ -319,10 +322,11 @@ namespace gbe
const GenRegister bti = ra->genReg(insn.src(0 + insn.extra.elem));
const GenRegister ucoord = ra->genReg(insn.src(1 + insn.extra.elem));
const GenRegister vcoord = ra->genReg(insn.src(2 + insn.extra.elem));
- const GenRegister R = ra->genReg(insn.src(3 + insn.extra.elem));
- const GenRegister G = ra->genReg(insn.src(4 + insn.extra.elem));
- const GenRegister B = ra->genReg(insn.src(5 + insn.extra.elem));
- const GenRegister A = ra->genReg(insn.src(6 + insn.extra.elem));
+ const GenRegister wcoord = ra->genReg(insn.src(3 + insn.extra.elem));
+ const GenRegister R = ra->genReg(insn.src(4 + insn.extra.elem));
+ const GenRegister G = ra->genReg(insn.src(5 + insn.extra.elem));
+ const GenRegister B = ra->genReg(insn.src(6 + insn.extra.elem));
+ const GenRegister A = ra->genReg(insn.src(7 + insn.extra.elem));
const GenRegister a0_0 = GenRegister::ud1arf(GEN_ARF_ADDRESS, 0);
p->push();
@@ -357,6 +361,8 @@ namespace gbe
GenRegister::retype(GenRegister::QnPhysical(src,quarter), src.type))
QUARTER_MOV0(nr + 1, ucoord);
QUARTER_MOV0(nr + 2, vcoord);
+ if (insn.src(3 + insn.extra.elem).reg() != 0)
+ QUARTER_MOV0(nr + 3, wcoord);
QUARTER_MOV1(nr + 5, R);
QUARTER_MOV1(nr + 6, G);
QUARTER_MOV1(nr + 7, B);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index ee8a0ab..2bd9aca 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -466,7 +466,7 @@ namespace gbe
/*! Encode ternary instructions */
void ALU3(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg src2);
/*! Encode sample instructions */
- void SAMPLE(GenRegister *dst, GenRegister *src, GenRegister *msgPayloads);
+ void SAMPLE(GenRegister *dst, uint32_t dstNum, GenRegister *src, uint32_t srcNum, GenRegister *msgPayloads, uint32_t msgNum);
/*! Encode typed write instructions */
void TYPED_WRITE(GenRegister *src, uint32_t srcNum, GenRegister *msgs, uint32_t msgNum);
/*! Use custom allocators */
@@ -965,26 +965,26 @@ namespace gbe
});
}
/* XXX always 4 return values? */
- void Selection::Opaque::SAMPLE(GenRegister *dst, GenRegister *src, GenRegister *msgPayloads) {
- uint32_t elemNum = 4;
- SelectionInstruction *insn = this->appendInsn(SEL_OP_SAMPLE, elemNum, 8);
+ void Selection::Opaque::SAMPLE(GenRegister *dst, uint32_t dstNum, GenRegister *src, uint32_t srcNum, GenRegister *msgPayloads, uint32_t msgNum) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_SAMPLE, dstNum, msgNum + srcNum);
SelectionVector *dstVector = this->appendVector();
SelectionVector *msgVector = this->appendVector();
// Regular instruction to encode
- for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
- {
+ for (uint32_t elemID = 0; elemID < dstNum; ++elemID)
insn->dst(elemID) = dst[elemID];
+ for (uint32_t elemID = 0; elemID < msgNum; ++elemID)
insn->src(elemID) = msgPayloads[elemID];
- insn->src(4 + elemID) = src[elemID];
- }
+ for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
+ insn->src(msgNum + elemID) = src[elemID];
// Sends require contiguous allocation
- dstVector->regNum = 4;
+ dstVector->regNum = dstNum;
dstVector->isSrc = 0;
dstVector->reg = &insn->dst(0);
- msgVector->regNum = 4;
+ // Only the messages require contiguous registers.
+ msgVector->regNum = msgNum;
msgVector->isSrc = 1;
msgVector->reg = &insn->src(0);
}
@@ -1957,18 +1957,18 @@ namespace gbe
{
using namespace ir;
GenRegister msgPayloads[4];
- GenRegister dst[4], src[4];
+ GenRegister dst[insn.getDstNum()], src[insn.getSrcNum()];
for( int i = 0; i < 4; ++i)
msgPayloads[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
- for (uint32_t valueID = 0; valueID < 4; ++valueID)
- {
+ for (uint32_t valueID = 0; valueID < insn.getDstNum(); ++valueID)
dst[valueID] = sel.selReg(insn.getDst(valueID), insn.getDstType());
+
+ for (uint32_t valueID = 0; valueID < insn.getSrcNum(); ++valueID)
src[valueID] = sel.selReg(insn.getSrc(valueID), insn.getSrcType());
- }
- sel.SAMPLE(dst, src, msgPayloads);
+ sel.SAMPLE(dst, insn.getDstNum(), src, insn.getSrcNum(), msgPayloads, 4);
return true;
}
DECL_CTOR(SampleInstruction, 1, 1);
@@ -1983,8 +1983,9 @@ namespace gbe
const uint32_t simdWidth = sel.ctx.getSimdWidth();
uint32_t valueID = 0;
GenRegister msgs[9]; // (header + U + V + R + LOD + 4)
- GenRegister src[7];
+ GenRegister src[insn.getSrcNum()];
uint32_t msgNum = (8 / (simdWidth / 8)) + 1;
+ uint32_t coordNum = (insn.getSrcNum() == 7) ? 2 : 3;
for(uint32_t i = 0; i < msgNum; i++)
msgs[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
@@ -1992,8 +1993,8 @@ namespace gbe
// bti always uses TYPE_U32.
src[valueID] = sel.selReg(insn.getSrc(valueID), TYPE_U32);
valueID++;
-
- for (; valueID < 3; ++valueID)
+ // u, v, w coords should use coord type.
+ for (; valueID < 1 + coordNum; ++valueID)
src[valueID] = sel.selReg(insn.getSrc(valueID), insn.getCoordType());
for (; valueID < insn.getSrcNum(); ++valueID)
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 9fd4247..8980abf 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -414,6 +414,7 @@ namespace ir {
<< " sampler %" << this->getSrc(fn, 1)
<< " coord u %" << this->getSrc(fn, 2)
<< " coord v %" << this->getSrc(fn, 3)
+ << " coord w %" << this->getSrc(fn, 4)
<< " %" << this->getDst(fn, 0)
<< " %" << this->getDst(fn, 1)
<< " %" << this->getDst(fn, 2)
@@ -427,7 +428,7 @@ namespace ir {
INLINE Type getSrcType(void) const { return this->srcType; }
INLINE Type getDstType(void) const { return this->dstType; }
- static const uint32_t srcNum = 4;
+ static const uint32_t srcNum = 5;
static const uint32_t dstNum = 4;
};
@@ -451,10 +452,11 @@ namespace ir {
<< " surface id %" << this->getSrc(fn, 0)
<< " coord u %" << this->getSrc(fn, 1)
<< " coord v %" << this->getSrc(fn, 2)
- << " %" << this->getSrc(fn, 3)
+ << " coord w %" << this->getSrc(fn, 3)
<< " %" << this->getSrc(fn, 4)
<< " %" << this->getSrc(fn, 5)
- << " %" << this->getSrc(fn, 6);
+ << " %" << this->getSrc(fn, 6)
+ << " %" << this->getSrc(fn, 7);
}
Tuple src;
@@ -463,8 +465,8 @@ namespace ir {
INLINE Type getSrcType(void) const { return this->srcType; }
INLINE Type getCoordType(void) const { return this->coordType; }
- // bti, u, v, 4 data elements
- static const uint32_t srcNum = 7;
+ // bti, u, v, w, 4 data elements
+ static const uint32_t srcNum = 8;
Register dst[0]; //!< No dest register
};
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 637e7be..ad465e2 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -102,13 +102,15 @@
#if !defined(LLVM_VERSION_MAJOR) || (LLVM_VERSION_MINOR == 1)
#include "llvm/Target/TargetData.h"
#endif
+#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR == 2)
#include "llvm/DataLayout.h"
+#endif
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
-#ifdef LLVM_32
+#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR <= 2)
#include "llvm/Support/InstVisitor.h"
#else
#include "llvm/InstVisitor.h"
@@ -869,7 +871,13 @@ namespace gbe
Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
// Insert a new register for each function argument
+#if LLVM_VERSION_MINOR <= 1
+ const AttrListPtr &PAL = F.getAttributes();
+ uint32_t argID = 1; // Start at one actually
+ for (; I != E; ++I, ++argID) {
+#else
for (; I != E; ++I) {
+#endif /* LLVM_VERSION_MINOR <= 1 */
const std::string &argName = I->getName().str();
Type *type = I->getType();
GBE_ASSERTM(isScalarType(type) == true,
@@ -1820,6 +1828,12 @@ namespace gbe
case GEN_OCL_WRITE_IMAGE3:
case GEN_OCL_WRITE_IMAGE4:
case GEN_OCL_WRITE_IMAGE5:
+ case GEN_OCL_WRITE_IMAGE10:
+ case GEN_OCL_WRITE_IMAGE11:
+ case GEN_OCL_WRITE_IMAGE12:
+ case GEN_OCL_WRITE_IMAGE13:
+ case GEN_OCL_WRITE_IMAGE14:
+ case GEN_OCL_WRITE_IMAGE15:
break;
case GEN_OCL_READ_IMAGE0:
case GEN_OCL_READ_IMAGE1:
@@ -1827,6 +1841,12 @@ namespace gbe
case GEN_OCL_READ_IMAGE3:
case GEN_OCL_READ_IMAGE4:
case GEN_OCL_READ_IMAGE5:
+ case GEN_OCL_READ_IMAGE10:
+ case GEN_OCL_READ_IMAGE11:
+ case GEN_OCL_READ_IMAGE12:
+ case GEN_OCL_READ_IMAGE13:
+ case GEN_OCL_READ_IMAGE14:
+ case GEN_OCL_READ_IMAGE15:
{
// dst is a 4 elements vector. We allocate all 4 registers here.
uint32_t elemNum;
@@ -1962,11 +1982,26 @@ namespace gbe
case GEN_OCL_READ_IMAGE3:
case GEN_OCL_READ_IMAGE4:
case GEN_OCL_READ_IMAGE5:
+ case GEN_OCL_READ_IMAGE10:
+ case GEN_OCL_READ_IMAGE11:
+ case GEN_OCL_READ_IMAGE12:
+ case GEN_OCL_READ_IMAGE13:
+ case GEN_OCL_READ_IMAGE14:
+ case GEN_OCL_READ_IMAGE15:
{
GBE_ASSERT(AI != AE); const ir::Register surface_id = this->getRegister(*AI); ++AI;
GBE_ASSERT(AI != AE); const ir::Register sampler = this->getRegister(*AI); ++AI;
GBE_ASSERT(AI != AE); const ir::Register ucoord = this->getRegister(*AI); ++AI;
GBE_ASSERT(AI != AE); const ir::Register vcoord = this->getRegister(*AI); ++AI;
+ ir::Register wcoord;
+ if (it->second == GEN_OCL_READ_IMAGE10 ||
+ it->second == GEN_OCL_READ_IMAGE11 ||
+ it->second == GEN_OCL_READ_IMAGE12 ||
+ it->second == GEN_OCL_READ_IMAGE13 ||
+ it->second == GEN_OCL_READ_IMAGE14) {
+ GBE_ASSERT(AI != AE); wcoord = this->getRegister(*AI); ++AI;
+ } else
+ wcoord = ir::Register(0);
vector<ir::Register> dstTupleData, srcTupleData;
const uint32_t elemNum = 4;
@@ -1978,26 +2013,33 @@ namespace gbe
srcTupleData.push_back(sampler);
srcTupleData.push_back(ucoord);
srcTupleData.push_back(vcoord);
+ srcTupleData.push_back(wcoord);
const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], elemNum);
- const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 4);
+ const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 5);
ir::Type srcType = ir::TYPE_U32, dstType = ir::TYPE_U32;
switch(it->second) {
case GEN_OCL_READ_IMAGE0:
case GEN_OCL_READ_IMAGE2:
+ case GEN_OCL_READ_IMAGE10:
+ case GEN_OCL_READ_IMAGE12:
srcType = dstType = ir::TYPE_U32;
break;
case GEN_OCL_READ_IMAGE1:
case GEN_OCL_READ_IMAGE3:
+ case GEN_OCL_READ_IMAGE11:
+ case GEN_OCL_READ_IMAGE13:
dstType = ir::TYPE_U32;
srcType = ir::TYPE_FLOAT;
break;
case GEN_OCL_READ_IMAGE4:
+ case GEN_OCL_READ_IMAGE14:
dstType = ir::TYPE_FLOAT;
srcType = ir::TYPE_U32;
break;
case GEN_OCL_READ_IMAGE5:
+ case GEN_OCL_READ_IMAGE15:
srcType = dstType = ir::TYPE_FLOAT;
break;
default:
@@ -2013,41 +2055,63 @@ namespace gbe
case GEN_OCL_WRITE_IMAGE3:
case GEN_OCL_WRITE_IMAGE4:
case GEN_OCL_WRITE_IMAGE5:
+ case GEN_OCL_WRITE_IMAGE10:
+ case GEN_OCL_WRITE_IMAGE11:
+ case GEN_OCL_WRITE_IMAGE12:
+ case GEN_OCL_WRITE_IMAGE13:
+ case GEN_OCL_WRITE_IMAGE14:
+ case GEN_OCL_WRITE_IMAGE15:
{
GBE_ASSERT(AI != AE); const ir::Register surface_id = this->getRegister(*AI); ++AI;
GBE_ASSERT(AI != AE); const ir::Register ucoord = this->getRegister(*AI); ++AI;
GBE_ASSERT(AI != AE); const ir::Register vcoord = this->getRegister(*AI); ++AI;
+ ir::Register wcoord;
+ if(it->second == GEN_OCL_WRITE_IMAGE10 ||
+ it->second == GEN_OCL_WRITE_IMAGE11 ||
+ it->second == GEN_OCL_WRITE_IMAGE12 ||
+ it->second == GEN_OCL_WRITE_IMAGE13 ||
+ it->second == GEN_OCL_WRITE_IMAGE14) {
+ GBE_ASSERT(AI != AE); wcoord = this->getRegister(*AI); ++AI;
+ } else
+ wcoord = ir::Register(0);
GBE_ASSERT(AI != AE);
vector<ir::Register> srcTupleData;
srcTupleData.push_back(surface_id);
srcTupleData.push_back(ucoord);
srcTupleData.push_back(vcoord);
+ srcTupleData.push_back(wcoord);
const uint32_t elemNum = 4;
for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
const ir::Register reg = this->getRegister(*AI, elemID);
srcTupleData.push_back(reg);
}
- const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 7);
+ const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 8);
ir::Type srcType = ir::TYPE_U32, coordType = ir::TYPE_U32;
switch(it->second) {
case GEN_OCL_WRITE_IMAGE0:
case GEN_OCL_WRITE_IMAGE2:
+ case GEN_OCL_WRITE_IMAGE10:
+ case GEN_OCL_WRITE_IMAGE12:
srcType = coordType = ir::TYPE_U32;
break;
case GEN_OCL_WRITE_IMAGE1:
case GEN_OCL_WRITE_IMAGE3:
+ case GEN_OCL_WRITE_IMAGE11:
+ case GEN_OCL_WRITE_IMAGE13:
coordType = ir::TYPE_FLOAT;
srcType = ir::TYPE_U32;
break;
case GEN_OCL_WRITE_IMAGE4:
+ case GEN_OCL_WRITE_IMAGE14:
srcType = ir::TYPE_FLOAT;
coordType = ir::TYPE_U32;
break;
case GEN_OCL_WRITE_IMAGE5:
+ case GEN_OCL_WRITE_IMAGE15:
srcType = coordType = ir::TYPE_FLOAT;
break;
default:
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 6b2e08b..055afa6 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -48,6 +48,13 @@ DECL_LLVM_GEN_FUNCTION(READ_IMAGE3, _Z22__gen_ocl_read_imageuijjff)
DECL_LLVM_GEN_FUNCTION(READ_IMAGE4, _Z21__gen_ocl_read_imagefjjii)
DECL_LLVM_GEN_FUNCTION(READ_IMAGE5, _Z21__gen_ocl_read_imagefjjff)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE10, _Z21__gen_ocl_read_imageijjiii)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE11, _Z21__gen_ocl_read_imageijjfff)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE12, _Z22__gen_ocl_read_imageuijjiii)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE13, _Z22__gen_ocl_read_imageuijjfff)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE14, _Z21__gen_ocl_read_imagefjjiii)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE15, _Z21__gen_ocl_read_imagefjjfff)
+
// To write_image functions.
DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE0, _Z22__gen_ocl_write_imageijiiDv4_i)
DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE1, _Z22__gen_ocl_write_imageijffDv4_i)
@@ -56,6 +63,13 @@ DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE5, _Z22__gen_ocl_write_imagefjffDv4_f)
DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE2, _Z23__gen_ocl_write_imageuijiiDv4_j)
DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE3, _Z23__gen_ocl_write_imageuijffDv4_j)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE10, _Z22__gen_ocl_write_imageijiiiDv4_i)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE11, _Z22__gen_ocl_write_imageijfffDv4_i)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE12, _Z23__gen_ocl_write_imageuijiiiDv4_j)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE13, _Z23__gen_ocl_write_imageuijfffDv4_j)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE14, _Z22__gen_ocl_write_imagefjiiiDv4_f)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE15, _Z22__gen_ocl_write_imagefjfffDv4_f)
+
// saturation related functions.
DECL_LLVM_GEN_FUNCTION(SADD_SAT_CHAR, _Z12ocl_sadd_satcc)
DECL_LLVM_GEN_FUNCTION(SADD_SAT_SHORT, _Z12ocl_sadd_satss)
diff --git a/backend/src/llvm/llvm_passes.cpp b/backend/src/llvm/llvm_passes.cpp
index 1a7a658..40c0e62 100644
--- a/backend/src/llvm/llvm_passes.cpp
+++ b/backend/src/llvm/llvm_passes.cpp
@@ -61,13 +61,15 @@
#if !defined(LLVM_VERSION_MAJOR) || (LLVM_VERSION_MINOR == 1)
#include "llvm/Target/TargetData.h"
#endif
+#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR == 2)
#include "llvm/DataLayout.h"
+#endif
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
-#ifdef LLVM_32
+#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR <= 2)
#include "llvm/Support/InstVisitor.h"
#else
#include "llvm/InstVisitor.h"
diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
index a21f332..4c0d39c 100644
--- a/backend/src/ocl_stdlib.h
+++ b/backend/src/ocl_stdlib.h
@@ -775,6 +775,14 @@ OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, uint sampler, int u,
OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, uint sampler, float u, float v);
OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, uint sampler, int u, int v);
OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, uint sampler, float u, float v);
+
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, uint sampler, int u, int v, int w);
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, uint sampler, float u, float v, float w);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, uint sampler, int u, int v, int w);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, uint sampler, float u, float v, float w);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, uint sampler, int u, int v, int w);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, uint sampler, float u, float v, float w);
+
OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v, int4 color);
OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, float u, float v, int4 color);
OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, int v, uint4 color);
@@ -782,6 +790,13 @@ OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, float u, float v, uin
OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, int v, float4 color);
OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, float u, float v, float4 color);
+OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v, int w, int4 color);
+OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, float u, float v, float w, int4 color);
+OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, int v, int w, uint4 color);
+OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, float u, float v, float w, uint4 color);
+OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, int v, int w, float4 color);
+OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, float u, float v, float w, float4 color);
+
#define GET_IMAGE(cl_image, surface_id) \
uint surface_id = (uint)cl_image
@@ -809,10 +824,39 @@ DECL_IMAGE(int4, i)
DECL_IMAGE(uint4, ui)
DECL_IMAGE(float4, f)
-#undef GET_IMAGE
#undef DECL_IMAGE
#undef DECL_READ_IMAGE
#undef DECL_WRITE_IMAGE
+
+#define DECL_READ_IMAGE(type, suffix, coord_type) \
+ INLINE_OVERLOADABLE type read_image ## suffix(image3d_t cl_image, sampler_t sampler, coord_type coord) \
+ {\
+ GET_IMAGE(cl_image, surface_id);\
+ return __gen_ocl_read_image ## suffix(surface_id, (uint)sampler, coord.s0, coord.s1, coord.s2);\
+ }
+
+#define DECL_WRITE_IMAGE(type, suffix, coord_type) \
+ INLINE_OVERLOADABLE void write_image ## suffix(image3d_t cl_image, coord_type coord, type color)\
+ {\
+ GET_IMAGE(cl_image, surface_id);\
+ __gen_ocl_write_image ## suffix(surface_id, coord.s0, coord.s1, coord.s2, color);\
+ }
+
+#define DECL_IMAGE(type, suffix) \
+ DECL_READ_IMAGE(type, suffix, int4) \
+ DECL_READ_IMAGE(type, suffix, float4) \
+ DECL_WRITE_IMAGE(type, suffix, int4) \
+ DECL_WRITE_IMAGE(type, suffix, float4)
+
+DECL_IMAGE(int4, i)
+DECL_IMAGE(uint4, ui)
+DECL_IMAGE(float4, f)
+
+#undef DECL_IMAGE
+#undef DECL_READ_IMAGE
+#undef DECL_WRITE_IMAGE
+
+#undef GET_IMAGE
#undef INLINE_OVERLOADABLE
#undef PURE
diff --git a/kernels/test_copy_image_3d.cl b/kernels/test_copy_image_3d.cl
new file mode 100644
index 0000000..766227a
--- /dev/null
+++ b/kernels/test_copy_image_3d.cl
@@ -0,0 +1,11 @@
+__kernel void
+test_copy_image_3d(__read_only image3d_t src, __write_only image3d_t dst, sampler_t sampler)
+{
+ int4 coord;
+ int4 color;
+ coord.x = (int)get_global_id(0);
+ coord.y = (int)get_global_id(1);
+ coord.z = 0;
+ color = read_imagei(src, sampler, coord);
+ write_imagei(dst, coord, color);
+}
diff --git a/kernels/test_fill_image_3d.cl b/kernels/test_fill_image_3d.cl
new file mode 100644
index 0000000..0f0c6fd
--- /dev/null
+++ b/kernels/test_fill_image_3d.cl
@@ -0,0 +1,14 @@
+__kernel void
+test_fill_image_3d(__write_only image3d_t dst, uint color)
+{
+ int4 coord;
+ int4 color4;
+ color4.s0 = (color >> 24) & 0xFF;
+ color4.s1 = (color >> 16) & 0xFF;
+ color4.s2 = (color >> 8) & 0xFF;
+ color4.s3 = color & 0xFF;
+ coord.x = (int)get_global_id(0);
+ coord.y = (int)get_global_id(1);
+ coord.z = 0;
+ write_imagei(dst, coord, color4);
+}
diff --git a/kernels/test_fill_image_3d_2.cl b/kernels/test_fill_image_3d_2.cl
new file mode 100644
index 0000000..22b6452
--- /dev/null
+++ b/kernels/test_fill_image_3d_2.cl
@@ -0,0 +1,10 @@
+__kernel void
+test_fill_image_3d_2(__write_only image3d_t dst)
+{
+ int4 coord;
+ int4 color4 = {0x12, 0x34, 0x56, 0x78};
+ coord.x = (int)get_global_id(0);
+ coord.y = (int)get_global_id(1);
+ coord.z = 0;
+ write_imagei(dst, coord, color4);
+}
diff --git a/src/cl_api.c b/src/cl_api.c
index 03cc0e6..cfbb44f 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -159,7 +159,24 @@ clGetContextInfo(cl_context context,
void * param_value,
size_t * param_value_size_ret)
{
- NOT_IMPLEMENTED;
+ switch (param_name) {
+ case CL_CONTEXT_DEVICES:
+ if (param_value) {
+ if (param_value_size < sizeof(cl_device_id))
+ return CL_INVALID_VALUE;
+ cl_device_id *device_list = (cl_device_id*)param_value;
+ device_list[0] = context->device;
+ if (param_value_size_ret)
+ *param_value_size_ret = sizeof(cl_device_id);
+ return CL_SUCCESS;
+ }
+ if (param_value_size_ret) {
+ *param_value_size_ret = sizeof(cl_device_id);
+ return CL_SUCCESS;
+ }
+ default:
+ NOT_IMPLEMENTED;
+ }
return 0;
}
@@ -326,8 +343,28 @@ clCreateImage3D(cl_context context,
void * host_ptr,
cl_int * errcode_ret)
{
- NOT_IMPLEMENTED;
- return NULL;
+ cl_mem mem = NULL;
+ cl_int err = CL_SUCCESS;
+ CHECK_CONTEXT (context);
+ cl_image_desc image_desc;
+
+ image_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
+ image_desc.image_width = image_width;
+ image_desc.image_height = image_height;
+ image_desc.image_depth = image_depth;
+ image_desc.image_row_pitch = image_row_pitch;
+ image_desc.image_slice_pitch = image_slice_pitch;
+
+ mem = cl_mem_new_image(context,
+ flags,
+ image_format,
+ &image_desc,
+ host_ptr,
+ errcode_ret);
+error:
+ if (errcode_ret)
+ *errcode_ret = err;
+ return mem;
}
cl_int
@@ -1056,13 +1093,6 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue,
goto error;
}
- /* Local size must be non-null */
- for (i = 0; i < work_dim; ++i)
- if (UNLIKELY(local_work_size[i] == 0)) {
- err = CL_INVALID_WORK_GROUP_SIZE;
- goto error;
- }
-
/* Check offset values. We add a non standard restriction. The offsets must
* also be evenly divided by the local sizes
*/
@@ -1072,16 +1102,16 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue,
err = CL_INVALID_GLOBAL_OFFSET;
goto error;
}
- if (UNLIKELY(global_work_offset[i] % local_work_size[i])) {
+ if (UNLIKELY(local_work_size != NULL && global_work_offset[i] % local_work_size[i])) {
err = CL_INVALID_GLOBAL_OFFSET;
goto error;
}
}
- /* Local sizes must divide global sizes */
+ /* Local sizes must be non-null and divide global sizes */
if (local_work_size != NULL)
for (i = 0; i < work_dim; ++i)
- if (UNLIKELY(global_work_size[i] % local_work_size[i])) {
+ if (UNLIKELY(local_work_size[i] == 0 || global_work_size[i] % local_work_size[i])) {
err = CL_INVALID_WORK_GROUP_SIZE;
goto error;
}
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 690e5cf..6950590 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -307,8 +307,9 @@ _cl_mem_new_image(cl_context ctx,
const cl_mem_object_type image_type,
size_t w,
size_t h,
+ size_t depth,
size_t pitch,
- int depth,
+ size_t slice_pitch,
void *data,
cl_int *errcode_ret)
{
@@ -356,6 +357,27 @@ _cl_mem_new_image(cl_context ctx,
/* Pick up tiling mode (we do only linear on SNB) */
if (cl_driver_get_ver(ctx->drv) != 6)
tiling = CL_TILE_Y;
+ depth = 1;
+ }
+
+ if (image_type == CL_MEM_OBJECT_IMAGE3D) {
+ size_t min_pitch = bpp * w;
+ if (data && pitch == 0)
+ pitch = min_pitch;
+ size_t min_slice_pitch = min_pitch * h;
+ if (data && slice_pitch == 0)
+ slice_pitch = min_slice_pitch;
+ if (UNLIKELY(w > ctx->device->image3d_max_width)) DO_IMAGE_ERROR;
+ if (UNLIKELY(h > ctx->device->image3d_max_height)) DO_IMAGE_ERROR;
+ if (UNLIKELY(depth > ctx->device->image3d_max_depth)) DO_IMAGE_ERROR;
+ if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR;
+ if (UNLIKELY(data && min_slice_pitch > slice_pitch)) DO_IMAGE_ERROR;
+ if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR;
+ if (UNLIKELY(!data && slice_pitch != 0)) DO_IMAGE_ERROR;
+
+ /* Pick up tiling mode (we do only linear on SNB) */
+ if (cl_driver_get_ver(ctx->drv) != 6)
+ tiling = CL_TILE_Y;
}
#undef DO_IMAGE_ERROR
@@ -371,7 +393,7 @@ _cl_mem_new_image(cl_context ctx,
aligned_h = ALIGN(h, tiley_h);
}
- sz = aligned_pitch * aligned_h;
+ sz = aligned_pitch * aligned_h * depth;
mem = cl_mem_allocate(ctx, flags, sz, tiling != CL_NO_TILE, &err);
if (mem == NULL || err != CL_SUCCESS)
goto error;
@@ -417,11 +439,11 @@ cl_mem_new_image(cl_context context,
switch (image_desc->image_type) {
case CL_MEM_OBJECT_IMAGE1D:
case CL_MEM_OBJECT_IMAGE2D:
+ case CL_MEM_OBJECT_IMAGE3D:
return _cl_mem_new_image(context, flags, image_format, image_desc->image_type,
- image_desc->image_width, image_desc->image_height,
- image_desc->image_row_pitch, image_desc->image_depth,
+ image_desc->image_width, image_desc->image_height, image_desc->image_depth,
+ image_desc->image_row_pitch, image_desc->image_slice_pitch,
host_ptr, errcode_ret);
- case CL_MEM_OBJECT_IMAGE3D:
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
case CL_MEM_OBJECT_IMAGE1D_BUFFER:
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 1275faa..20d5456 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -23,10 +23,13 @@ set (utests_sources
compiler_convert_uchar_sat.cpp
compiler_copy_buffer.cpp
compiler_copy_image.cpp
+ compiler_copy_image_3d.cpp
compiler_copy_buffer_row.cpp
compiler_fabs.cpp
compiler_fill_image.cpp
compiler_fill_image0.cpp
+ compiler_fill_image_3d.cpp
+ compiler_fill_image_3d_2.cpp
compiler_function_argument0.cpp
compiler_function_argument1.cpp
compiler_function_argument.cpp
diff --git a/utests/compiler_copy_image_3d.cpp b/utests/compiler_copy_image_3d.cpp
new file mode 100644
index 0000000..9816fe4
--- /dev/null
+++ b/utests/compiler_copy_image_3d.cpp
@@ -0,0 +1,55 @@
+#include "utest_helper.hpp"
+
+static void compiler_copy_image_3d(void)
+{
+ const size_t w = 512;
+ const size_t h = 512;
+ const size_t depth = 1;
+ cl_image_format format;
+ cl_image_desc desc;
+ cl_sampler sampler;
+
+ // Setup kernel and images
+ OCL_CREATE_KERNEL("test_copy_image_3d");
+ buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * w * h * depth);
+ for (uint32_t k = 0; k < depth; k++)
+ for (uint32_t j = 0; j < h; j++)
+ for (uint32_t i = 0; i < w; i++)
+ ((uint32_t*)buf_data[0])[k*w*h + j*w + i] = k*w*h + j*w + i;
+
+ format.image_channel_order = CL_RGBA;
+ format.image_channel_data_type = CL_UNSIGNED_INT8;
+ desc.image_type = CL_MEM_OBJECT_IMAGE3D;
+ desc.image_width = w;
+ desc.image_height = h;
+ desc.image_depth = depth;
+ desc.image_row_pitch = 0;
+ desc.image_slice_pitch = 0;
+ OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, &format, &desc, buf_data[0]);
+ OCL_CREATE_IMAGE(buf[1], 0, &format, &desc, NULL);
+ OCL_CREATE_SAMPLER(sampler, CL_ADDRESS_REPEAT, CL_FILTER_NEAREST);
+ free(buf_data[0]);
+ buf_data[0] = NULL;
+
+ // Run the kernel
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ OCL_SET_ARG(2, sizeof(sampler), &sampler);
+ globals[0] = w;
+ globals[1] = h;
+ locals[0] = 16;
+ locals[1] = 16;
+ OCL_NDRANGE(2);
+
+ // Check result
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (uint32_t k = 0; k < depth; k++)
+ for (uint32_t j = 0; j < h; ++j)
+ for (uint32_t i = 0; i < w; i++)
+ OCL_ASSERT(((uint32_t*)buf_data[0])[k*w*h + j*w + i] == ((uint32_t*)buf_data[1])[k*w*h + j*w + i]);
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_copy_image_3d);
diff --git a/utests/compiler_fill_image_3d.cpp b/utests/compiler_fill_image_3d.cpp
new file mode 100644
index 0000000..5d98b0b
--- /dev/null
+++ b/utests/compiler_fill_image_3d.cpp
@@ -0,0 +1,44 @@
+#include "utest_helper.hpp"
+
+static void compiler_fill_image_3d(void)
+{
+ const size_t w = 512;
+ const size_t h = 512;
+ const size_t depth = 1;
+ uint32_t color = 0x12345678;
+ cl_image_format format;
+ cl_image_desc desc;
+
+ format.image_channel_order = CL_RGBA;
+ format.image_channel_data_type = CL_UNSIGNED_INT8;
+ desc.image_type = CL_MEM_OBJECT_IMAGE3D;
+ desc.image_width = w;
+ desc.image_height = h;
+ desc.image_depth = depth;
+ desc.image_row_pitch = 0;
+ desc.image_slice_pitch = 0;
+
+ // Setup kernel and images
+ OCL_CREATE_KERNEL("test_fill_image_3d");
+
+ OCL_CREATE_IMAGE(buf[0], 0, &format, &desc, NULL);
+
+ // Run the kernel
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(color), &color);
+ globals[0] = w;
+ globals[1] = h;
+ locals[0] = 16;
+ locals[1] = 16;
+ OCL_NDRANGE(2);
+
+ // Check result
+ OCL_MAP_BUFFER(0);
+ for (uint32_t k = 0; k < depth; k++)
+ for (uint32_t j = 0; j < h; ++j)
+ for (uint32_t i = 0; i < w; i++)
+ OCL_ASSERT(((uint32_t*)buf_data[0])[k*w*h + j*w + i] == 0x78563412);
+ OCL_UNMAP_BUFFER(0);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_fill_image_3d);
diff --git a/utests/compiler_fill_image_3d_2.cpp b/utests/compiler_fill_image_3d_2.cpp
new file mode 100644
index 0000000..3b4edb9
--- /dev/null
+++ b/utests/compiler_fill_image_3d_2.cpp
@@ -0,0 +1,42 @@
+#include "utest_helper.hpp"
+
+static void compiler_fill_image_3d_2(void)
+{
+ const size_t w = 512;
+ const size_t h = 512;
+ const size_t depth = 1;
+ cl_image_format format;
+ cl_image_desc desc;
+
+ format.image_channel_order = CL_RGBA;
+ format.image_channel_data_type = CL_UNSIGNED_INT8;
+ desc.image_type = CL_MEM_OBJECT_IMAGE3D;
+ desc.image_width = w;
+ desc.image_height = h;
+ desc.image_depth = depth;
+ desc.image_row_pitch = 0;
+ desc.image_slice_pitch = 0;
+
+ // Setup kernel and images
+ OCL_CREATE_KERNEL("test_fill_image_3d_2");
+
+ OCL_CREATE_IMAGE(buf[0], 0, &format, &desc, NULL);
+
+ // Run the kernel
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ globals[0] = w;
+ globals[1] = h;
+ locals[0] = 16;
+ locals[1] = 16;
+ OCL_NDRANGE(2);
+
+ // Check result
+ OCL_MAP_BUFFER(0);
+ for (uint32_t k = 0; k < depth; k++)
+ for (uint32_t j = 0; j < h; ++j)
+ for (uint32_t i = 0; i < w; i++)
+ OCL_ASSERT(((uint32_t*)buf_data[0])[k*w*h + j*w + i] == 0x78563412);
+ OCL_UNMAP_BUFFER(0);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_fill_image_3d_2);
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-opencl/beignet.git
More information about the Pkg-opencl-devel
mailing list