[Pkg-opencl-devel] [beignet] 16/66: Imported Upstream version 0.1+git20130422+003fac5
Andreas Beckmann
anbe at moszumanska.debian.org
Fri Oct 31 07:27:03 UTC 2014
This is an automated email from the git hooks/post-receive script.
anbe pushed a commit to branch master
in repository beignet.
commit bbfdc9a35c42bb2cf517139ca440930b3c195971
Author: Simon Richter <sjr at debian.org>
Date: Mon Apr 22 15:10:54 2013 +0200
Imported Upstream version 0.1+git20130422+003fac5
---
backend/src/backend/context.cpp | 211 +++++++++++++--------
backend/src/backend/context.hpp | 2 +
backend/src/backend/gen_context.cpp | 28 ++-
backend/src/backend/gen_context.hpp | 1 +
.../src/backend/gen_insn_gen7_schedule_info.hxx | 1 +
backend/src/backend/gen_insn_selection.cpp | 34 +++-
backend/src/backend/gen_insn_selection.hxx | 1 +
backend/src/backend/gen_program.cpp | 5 +-
backend/src/backend/gen_register.hpp | 10 +
backend/src/backend/program.cpp | 13 +-
backend/src/backend/program.h | 7 +-
backend/src/backend/program.hpp | 15 +-
include/CL/cl_intel.h | 54 ++++--
kernels/compiler_function_constant.cl | 6 +
kernels/compiler_function_constant0.cl | 6 +
src/cl_api.c | 73 +++++--
src/cl_command_queue.c | 22 ++-
src/cl_command_queue.h | 2 +
src/cl_command_queue_gen7.c | 7 +-
src/cl_kernel.c | 16 +-
src/cl_mem.c | 16 ++
src/cl_mem.h | 7 +
utests/CMakeLists.txt | 3 +
utests/compiler_copy_image.cpp | 3 +-
utests/compiler_fill_image.cpp | 2 +-
utests/compiler_fill_image0.cpp | 2 +-
utests/compiler_function_constant.cpp | 34 ++++
utests/compiler_function_constant0.cpp | 42 ++++
utests/compiler_function_constant1.cpp | 47 +++++
utests/compiler_movforphi_undef.cpp | 15 +-
utests/runtime_flat_address_space.cpp | 8 +-
utests/utest_helper.cpp | 14 +-
utests/utest_helper.hpp | 4 +-
33 files changed, 560 insertions(+), 151 deletions(-)
diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
index 180e8bb..91d8d8c 100644
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
@@ -53,7 +53,7 @@ namespace gbe
* the hardware. Note that we always use the left most block when
* allocating, so it makes sense for constant pushing
*/
- int16_t allocate(int16_t size, int16_t alignment);
+ int16_t allocate(int16_t size, int16_t alignment, bool bFwd=false);
/*! Free the given register file piece */
void deallocate(int16_t offset);
@@ -75,8 +75,9 @@ namespace gbe
* If the colascing was done, the left block is deleted
*/
void coalesce(Block *left, Block *right);
- /*! Head of the free list */
+ /*! Head and tail of the free list */
Block *head;
+ Block *tail;
/*! Handle free list element allocation */
DECL_POOL(Block, blockPool);
/*! Track allocated memory blocks <offset, size> */
@@ -89,10 +90,10 @@ namespace gbe
// r0 is always set by the HW and used at the end by EOT
const int16_t offset = GEN_REG_SIZE;
const int16_t size = RegisterFileSize - offset;
- head = this->newBlock(offset, size);
+ tail = head = this->newBlock(offset, size);
}
- RegisterFilePartitioner::~RegisterFilePartitioner(void) {
+ RegisterFilePartitioner::~RegisterFilePartitioner(void) {
while (this->head) {
Block *next = this->head->next;
this->deleteBlock(this->head);
@@ -100,80 +101,104 @@ namespace gbe
}
}
- int16_t RegisterFilePartitioner::allocate(int16_t size, int16_t alignment)
+ int16_t RegisterFilePartitioner::allocate(int16_t size, int16_t alignment, bool bFwd)
{
// Make it simple and just use the first block we find
- Block *list = head;
+ Block *list = bFwd ? head : tail;
while (list) {
- const int16_t aligned = ALIGN(list->offset, alignment);
- const int16_t spaceOnLeft = aligned - list->offset;
- const int16_t spaceOnRight = list->size - size - spaceOnLeft;
+ int16_t aligned;
+ int16_t spaceOnLeft;
+ int16_t spaceOnRight;
+ if(bFwd) {
+ aligned = ALIGN(list->offset, alignment);
+ spaceOnLeft = aligned - list->offset;
+ spaceOnRight = list->size - size - spaceOnLeft;
// Not enough space in this block
- if (spaceOnRight < 0) {
- list = list->next;
- continue;
+ if (spaceOnRight < 0) {
+ list = list->next;
+ continue;
+ }
+ } else {
+ aligned = ALIGN(list->offset+list->size-size-(alignment-1), alignment); //alloc from block's tail
+ spaceOnLeft = aligned - list->offset;
+ spaceOnRight = list->size - size - spaceOnLeft;
+
+ // Not enough space in this block
+ if (spaceOnLeft < 0) {
+ list = list->prev;
+ continue;
+ }
}
+
// Cool we can use this block
- else {
- Block *left = list->prev;
- Block *right = list->next;
-
- // If we left a hole on the left, create a new block
- if (spaceOnLeft) {
- Block *newBlock = this->newBlock(list->offset, spaceOnLeft);
- if (left) {
- left->next = newBlock;
- newBlock->prev = left;
- }
- if (right) {
- newBlock->next = right;
- right->prev = newBlock;
- }
- left = newBlock;
+ Block *left = list->prev;
+ Block *right = list->next;
+
+ // If we left a hole on the left, create a new block
+ if (spaceOnLeft) {
+ Block *newBlock = this->newBlock(list->offset, spaceOnLeft);
+ if (left) {
+ left->next = newBlock;
+ newBlock->prev = left;
}
-
- // If we left a hole on the right, create a new block as well
- if (spaceOnRight) {
- Block *newBlock = this->newBlock(aligned + size, spaceOnRight);
- if (left) {
- left->next = newBlock;
- newBlock->prev = left;
- }
- if (right) {
- right->prev = newBlock;
- newBlock->next = right;
- }
- right = newBlock;
+ if (right) {
+ newBlock->next = right;
+ right->prev = newBlock;
}
+ left = newBlock;
+ }
- // Chain both successors and predecessors when the entire block was
- // allocated
- if (spaceOnLeft == 0 && spaceOnRight == 0) {
- if (left) left->next = right;
- if (right) right->prev = left;
+ // If we left a hole on the right, create a new block as well
+ if (spaceOnRight) {
+ Block *newBlock = this->newBlock(aligned + size, spaceOnRight);
+ if (left) {
+ left->next = newBlock;
+ newBlock->prev = left;
}
-
- // Update the head of the free blocks
- if (list == head) {
- if (left)
- head = left;
- else if (right)
- head = right;
- else
- head = NULL;
+ if (right) {
+ right->prev = newBlock;
+ newBlock->next = right;
}
+ right = newBlock;
+ }
- // Free the block and check the consistency
- this->deleteBlock(list);
- if (head && head->next) GBE_ASSERT(head->next->prev == head);
+ // Chain both successors and predecessors when the entire block was
+ // allocated
+ if (spaceOnLeft == 0 && spaceOnRight == 0) {
+ if (left) left->next = right;
+ if (right) right->prev = left;
+ }
- // Track the allocation to retrieve the size later
- allocatedBlocks.insert(std::make_pair(aligned, size));
+ // Update the head of the free blocks
+ if (list == head) {
+ if (left)
+ head = left;
+ else if (right)
+ head = right;
+ else
+ head = NULL;
+ }
- // We have a valid offset now
- return aligned;
+ // Update the tail of the free blocks
+ if (list == tail) {
+ if (right)
+ tail = right;
+ else if (left)
+ tail = left;
+ else
+ tail = NULL;
}
+ // Free the block and check the consistency
+ this->deleteBlock(list);
+ if (head && head->next) GBE_ASSERT(head->next->prev == head);
+ if (tail && tail->prev) GBE_ASSERT(tail->prev->next == tail);
+
+ // Track the allocation to retrieve the size later
+ allocatedBlocks.insert(std::make_pair(aligned, size));
+
+ // We have a valid offset now
+ return aligned;
}
return 0;
}
@@ -186,34 +211,35 @@ namespace gbe
const int16_t size = it->second;
// Find the two blocks where to insert the new block
- Block *list = head, *prev = NULL;
+ Block *list = tail, *next = NULL;
while (list != NULL) {
- if (list->offset > offset)
+ if (list->offset < offset)
break;
- prev = list;
- list = list->next;
+ next = list;
+ list = list->prev;
}
// Create the block and insert it
Block *newBlock = this->newBlock(offset, size);
- if (prev) {
- GBE_ASSERT(prev->offset + prev->size <= offset);
- prev->next = newBlock;
- newBlock->prev = prev;
+ if (list) {
+ GBE_ASSERT(list->offset + list->size <= offset);
+ list->next = newBlock;
+ newBlock->prev = list;
} else
- this->head = newBlock; // prev is NULL means newBlock should be the head.
+ this->head = newBlock; // list is NULL means newBlock should be the head.
- if (list) {
- GBE_ASSERT(offset + size <= list->offset);
- list->prev = newBlock;
- newBlock->next = list;
- }
+ if (next) {
+ GBE_ASSERT(offset + size <= next->offset);
+ next->prev = newBlock;
+ newBlock->next = next;
+ } else
+ this->tail = newBlock; // next is NULL means newBlock should be the tail.
- if (prev != NULL || list != NULL)
+ if (list != NULL || next != NULL)
{
// Coalesce the blocks if possible
- this->coalesce(prev, newBlock);
- this->coalesce(newBlock, list);
+ this->coalesce(list, newBlock);
+ this->coalesce(newBlock, next);
}
// Do not track this allocation anymore
@@ -273,6 +299,8 @@ namespace gbe
GBE_DELETE(this->kernel);
this->kernel = NULL;
}
+ if(this->kernel != NULL)
+ this->kernel->ctx = this;
return this->kernel;
}
@@ -282,6 +310,27 @@ namespace gbe
void Context::deallocate(int16_t offset) { partitioner->deallocate(offset); }
+ int32_t Context::allocConstBuf(uint32_t argID) {
+ GBE_ASSERT(kernel->args[argID].type == GBE_ARG_CONSTANT_PTR);
+
+ //free previous
+ int32_t offset = kernel->getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, argID+GBE_CONSTANT_BUFFER);
+ if(offset >= 0)
+ deallocate(offset+GEN_REG_SIZE);
+
+ if(kernel->args[argID].bufSize > 0) {
+ //use 32 alignment here as GEN_REG_SIZE, need dynamic by type?
+ newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_CONSTANT_BUFFER+argID, kernel->args[argID].bufSize, 32);
+ }
+
+ std::sort(kernel->patches.begin(), kernel->patches.end());
+ offset = kernel->getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, argID+GBE_CONSTANT_BUFFER);
+ GBE_ASSERT(offset>=0);
+
+ kernel->curbeSize = ALIGN(kernel->curbeSize, GEN_REG_SIZE);
+ return offset + GEN_REG_SIZE;
+ }
+
void Context::buildStack(void) {
const auto &stackUse = dag->getUse(ir::ocl::stackptr);
if (stackUse.size() == 0) // no stack is used if stackptr is unused
@@ -297,7 +346,7 @@ namespace gbe
uint32_t alignment)
{
alignment = alignment == 0 ? size : alignment;
- const uint32_t offset = partitioner->allocate(size, alignment);
+ const uint32_t offset = partitioner->allocate(size, alignment, 1);
GBE_ASSERT(offset >= GEN_REG_SIZE);
kernel->patches.push_back(PatchInfo(value, subValue, offset - GEN_REG_SIZE));
kernel->curbeSize = std::max(kernel->curbeSize, offset + size - GEN_REG_SIZE);
diff --git a/backend/src/backend/context.hpp b/backend/src/backend/context.hpp
index 55a63a7..245ad01 100644
--- a/backend/src/backend/context.hpp
+++ b/backend/src/backend/context.hpp
@@ -86,6 +86,8 @@ namespace gbe
int16_t allocate(int16_t size, int16_t alignment);
/*! Deallocate previously allocated memory */
void deallocate(int16_t offset);
+ /* allocate curbe for constant ptr argument */
+ int32_t allocConstBuf(uint32_t argID);
protected:
/*! Build the instruction stream. Return false if failed */
virtual bool emitCode(void) = 0;
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index b3d385b..1f867b8 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -144,7 +144,7 @@ namespace gbe
}
}
- void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) {
+ void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) {
const GenRegister dst = ra->genReg(insn.dst(0));
const GenRegister src0 = ra->genReg(insn.src(0));
const GenRegister src1 = ra->genReg(insn.src(1));
@@ -212,6 +212,32 @@ namespace gbe
}
}
+ void GenContext::emitCBMoveInstruction(const SelectionInstruction &insn) {
+ const GenRegister src = GenRegister::unpacked_uw(ra->genReg(insn.src(0)).nr, 0);
+ const GenRegister dst = ra->genReg(insn.dst(0));
+ const GenRegister a0 = GenRegister::addr8(0);
+ uint32_t simdWidth = p->curr.execWidth;
+
+ p->push();
+ p->curr.execWidth = 8;
+ p->curr.quarterControl = GEN_COMPRESSION_Q1;
+ p->MOV(a0, src);
+ p->MOV(dst, GenRegister::indirect(dst.type, 0, GEN_WIDTH_8));
+ p->pop();
+
+ if (simdWidth == 16) {
+ p->push();
+ p->curr.execWidth = 8;
+ p->curr.quarterControl = GEN_COMPRESSION_Q2;
+
+ const GenRegister nextDst = GenRegister::Qn(dst, 1);
+ const GenRegister nextSrc = GenRegister::Qn(src, 1);
+ p->MOV(a0, nextSrc);
+ p->MOV(nextDst, GenRegister::indirect(dst.type, 0, GEN_WIDTH_8));
+ p->pop();
+ }
+ }
+
void GenContext::emitJumpInstruction(const SelectionInstruction &insn) {
const ir::LabelIndex label(insn.index);
const GenRegister src = ra->genReg(insn.src(0));
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 6af174f..33258f8 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -80,6 +80,7 @@ namespace gbe
void emitTernaryInstruction(const SelectionInstruction &insn);
void emitCompareInstruction(const SelectionInstruction &insn);
void emitJumpInstruction(const SelectionInstruction &insn);
+ void emitCBMoveInstruction(const SelectionInstruction &insn);
void emitEotInstruction(const SelectionInstruction &insn);
void emitNoOpInstruction(const SelectionInstruction &insn);
void emitWaitInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index 969ec82..ce8769f 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -5,6 +5,7 @@ DECL_GEN7_SCHEDULE(Binary, 20, 4, 2)
DECL_GEN7_SCHEDULE(Ternary, 20, 4, 2)
DECL_GEN7_SCHEDULE(Compare, 20, 4, 2)
DECL_GEN7_SCHEDULE(Jump, 14, 1, 1)
+DECL_GEN7_SCHEDULE(CBMove, 20, 2, 2)
DECL_GEN7_SCHEDULE(Eot, 20, 1, 1)
DECL_GEN7_SCHEDULE(NoOp, 20, 2, 2)
DECL_GEN7_SCHEDULE(Wait, 20, 2, 2)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index e0e8920..ee8a0ab 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
@@ -25,7 +25,7 @@
/* This is the instruction selection code. First of all, this is a bunch of c++
* crap. Sorry if this is not that readable. Anyway, the goal here is to take
* GenIR code (i.e. the very regular, very RISC IR) and to produce GenISA with
- * virtual registers (i.e. regular GenIR registers).
+ * virtual registers (i.e. regular GenIR registers).
*
* Overall idea:
* =============
@@ -72,7 +72,7 @@
* *same* flag register for the predicates (used for masking) and the
* conditional modifier (used as a destination for CMP). This leads to extra
* complications with compare instructions and select instructions. Basically,
- * we need to insert extra MOVs.
+ * we need to insert extra MOVs.
*
* Also, there is some extra kludge to handle the predicates for JMPI.
*
@@ -439,6 +439,8 @@ namespace gbe
void CMP(uint32_t conditional, Reg src0, Reg src1);
/*! Select instruction with embedded comparison */
void SEL_CMP(uint32_t conditional, Reg dst, Reg src0, Reg src1);
+ /* Constant buffer move instruction */
+ void CB_MOVE(Reg dst, Reg src);
/*! EOT is used to finish GPGPU threads */
void EOT(void);
/*! No-op */
@@ -481,7 +483,7 @@ namespace gbe
static void markAllChildren(SelectionDAG &dag) {
// Do not merge anything, so all sources become roots
for (uint32_t childID = 0; childID < dag.childNum; ++childID)
- if (dag.child[childID])
+ if (dag.child[childID])
dag.child[childID]->isRoot = 1;
}
@@ -698,6 +700,11 @@ namespace gbe
insn->src(1) = src1;
insn->extra.function = conditional;
}
+ void Selection::Opaque::CB_MOVE(Reg dst, Reg src) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_CB_MOVE, 1, 1);
+ insn->dst(0) = dst;
+ insn->src(0) = src;
+ }
void Selection::Opaque::EOT(void) { this->appendInsn(SEL_OP_EOT, 0, 0); }
void Selection::Opaque::NOP(void) { this->appendInsn(SEL_OP_NOP, 0, 0); }
@@ -1057,7 +1064,7 @@ namespace gbe
// Implementation of all patterns
///////////////////////////////////////////////////////////////////////////
- GenRegister getRegisterFromImmediate(ir::Immediate imm)
+ GenRegister getRegisterFromImmediate(ir::Immediate imm)
{
using namespace ir;
switch (imm.type) {
@@ -1654,15 +1661,30 @@ namespace gbe
sel.MOV(GenRegister::retype(value, GEN_TYPE_UB), GenRegister::unpacked_ub(dst));
}
+ void emitCBMove(Selection::Opaque &sel,
+ const ir::LoadInstruction &insn,
+ GenRegister address) const
+ {
+ using namespace ir;
+ GBE_ASSERT(insn.getValueNum() == 1); //todo: handle vec later
+
+ const GenRegister dst = sel.selReg(insn.getValue(0), insn.getValueType());
+ const GenRegister src = address;
+ sel.CB_MOVE(dst, src);
+ }
+
INLINE bool emitOne(Selection::Opaque &sel, const ir::LoadInstruction &insn) const {
using namespace ir;
const GenRegister address = sel.selReg(insn.getAddress());
const AddressSpace space = insn.getAddressSpace();
GBE_ASSERT(insn.getAddressSpace() == MEM_GLOBAL ||
+ insn.getAddressSpace() == MEM_CONSTANT ||
insn.getAddressSpace() == MEM_PRIVATE ||
insn.getAddressSpace() == MEM_LOCAL);
GBE_ASSERT(sel.ctx.isScalarReg(insn.getValue(0)) == false);
- if (insn.isAligned() == true)
+ if (insn.getAddressSpace() == MEM_CONSTANT)
+ this->emitCBMove(sel, insn, address);
+ else if (insn.isAligned() == true)
this->emitUntypedRead(sel, insn, address, space == MEM_LOCAL ? 0xfe : 0x00);
else {
const GenRegister value = sel.selReg(insn.getValue(0));
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 691100b..f89ad4c 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -24,6 +24,7 @@ DECL_SELECTION_IR(SEL_CMP, CompareInstruction)
DECL_SELECTION_IR(MAD, TernaryInstruction)
DECL_SELECTION_IR(JMPI, JumpInstruction)
DECL_SELECTION_IR(EOT, EotInstruction)
+DECL_SELECTION_IR(CB_MOVE, CBMoveInstruction)
DECL_SELECTION_IR(NOP, NoOpInstruction)
DECL_SELECTION_IR(WAIT, WaitInstruction)
DECL_SELECTION_IR(MATH, MathInstruction)
diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp
index 8a7efdb..3d7bedd 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -76,9 +76,10 @@ namespace gbe {
unit.getFunction(name)->setSimdWidth(simdWidth);
Context *ctx = GBE_NEW(GenContext, unit, name, limitRegisterPressure);
kernel = ctx->compileKernel();
- GBE_DELETE(ctx);
- if (kernel != NULL)
+ if (kernel != NULL) {
break;
+ }
+ GBE_DELETE(ctx);
}
// XXX spill must be implemented
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
index 92122a6..d772b0d 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -725,6 +725,16 @@ namespace gbe
return ub16(GEN_GENERAL_REGISTER_FILE, nr, subnr);
}
+ static INLINE GenRegister unpacked_uw(uint32_t nr, uint32_t subnr) {
+ return GenRegister(GEN_GENERAL_REGISTER_FILE,
+ nr,
+ subnr,
+ GEN_TYPE_UW,
+ GEN_VERTICAL_STRIDE_16,
+ GEN_WIDTH_8,
+ GEN_HORIZONTAL_STRIDE_2);
+ }
+
static INLINE GenRegister mask(uint32_t subnr) {
return uw1(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_MASK, subnr);
}
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index d33c533..0882e5a 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
@@ -49,9 +49,10 @@
namespace gbe {
Kernel::Kernel(const std::string &name) :
- name(name), args(NULL), argNum(0), curbeSize(0), stackSize(0), useSLM(false)
+ name(name), args(NULL), argNum(0), curbeSize(0), stackSize(0), useSLM(false), ctx(NULL)
{}
Kernel::~Kernel(void) {
+ if(ctx) GBE_DELETE(ctx);
GBE_SAFE_DELETE_ARRAY(args);
}
int32_t Kernel::getCurbeOffset(gbe_curbe_type type, uint32_t subType) const {
@@ -229,6 +230,12 @@ namespace gbe {
return kernel->getUseSLM() ? 1 : 0;
}
+ static int32_t kernelSetConstBufSize(gbe_kernel genKernel, uint32_t argID, size_t sz) {
+ if (genKernel == NULL) return -1;
+ gbe::Kernel *kernel = (gbe::Kernel*) genKernel;
+ return kernel->setConstBufSize(argID, sz);
+ }
+
static uint32_t kernelGetRequiredWorkGroupSize(gbe_kernel kernel, uint32_t dim) {
return 0u;
}
@@ -251,6 +258,7 @@ GBE_EXPORT_SYMBOL gbe_kernel_get_simd_width_cb *gbe_kernel_get_simd_width = NULL
GBE_EXPORT_SYMBOL gbe_kernel_get_curbe_offset_cb *gbe_kernel_get_curbe_offset = NULL;
GBE_EXPORT_SYMBOL gbe_kernel_get_curbe_size_cb *gbe_kernel_get_curbe_size = NULL;
GBE_EXPORT_SYMBOL gbe_kernel_get_stack_size_cb *gbe_kernel_get_stack_size = NULL;
+GBE_EXPORT_SYMBOL gbe_kernel_set_const_buffer_size_cb *gbe_kernel_set_const_buffer_size = NULL;
GBE_EXPORT_SYMBOL gbe_kernel_get_required_work_group_size_cb *gbe_kernel_get_required_work_group_size = NULL;
GBE_EXPORT_SYMBOL gbe_kernel_use_slm_cb *gbe_kernel_use_slm = NULL;
@@ -275,6 +283,7 @@ namespace gbe
gbe_kernel_get_curbe_offset = gbe::kernelGetCurbeOffset;
gbe_kernel_get_curbe_size = gbe::kernelGetCurbeSize;
gbe_kernel_get_stack_size = gbe::kernelGetStackSize;
+ gbe_kernel_set_const_buffer_size = gbe::kernelSetConstBufSize;
gbe_kernel_get_required_work_group_size = gbe::kernelGetRequiredWorkGroupSize;
gbe_kernel_use_slm = gbe::kernelUseSLM;
genSetupCallBacks();
diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h
index b90c1df..4273a77 100644
--- a/backend/src/backend/program.h
+++ b/backend/src/backend/program.h
@@ -81,7 +81,8 @@ enum gbe_curbe_type {
/*! Extra arguments use the negative range of sub-values */
enum gbe_extra_argument {
- GBE_STACK_BUFFER = 0 /* Give stack location in curbe */
+ GBE_STACK_BUFFER = 0, /* Give stack location in curbe */
+ GBE_CONSTANT_BUFFER = 1 /* constant buffer argument location in curbe */
};
/*! Create a new program from the given source code (zero terminated string) */
@@ -159,6 +160,10 @@ extern gbe_kernel_get_stack_size_cb *gbe_kernel_get_stack_size;
typedef int32_t (gbe_kernel_get_curbe_offset_cb)(gbe_kernel, enum gbe_curbe_type type, uint32_t sub_type);
extern gbe_kernel_get_curbe_offset_cb *gbe_kernel_get_curbe_offset;
+/*! Set the constant pointer arg size and return the cb offset in curbe */
+typedef int32_t (gbe_kernel_set_const_buffer_size_cb)(gbe_kernel, uint32_t argID, size_t sz);
+extern gbe_kernel_set_const_buffer_size_cb *gbe_kernel_set_const_buffer_size;
+
/*! Indicates if a work group size is required. Return the required width or 0
* if none
*/
diff --git a/backend/src/backend/program.hpp b/backend/src/backend/program.hpp
index e0f7dba..e7584d9 100644
--- a/backend/src/backend/program.hpp
+++ b/backend/src/backend/program.hpp
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
@@ -26,6 +26,7 @@
#define __GBE_PROGRAM_HPP__
#include "backend/program.h"
+#include "backend/context.hpp"
#include "sys/hash_map.hpp"
#include "sys/vector.hpp"
#include <string>
@@ -42,6 +43,7 @@ namespace gbe {
struct KernelArgument {
gbe_arg_type type; //!< Pointer, structure, image, regular value?
uint32_t size; //!< Size of the argument
+ uint32_t bufSize; //!< Contant buffer size
};
/*! Stores the offset where to patch where to patch */
@@ -94,6 +96,16 @@ namespace gbe {
INLINE uint32_t getSIMDWidth(void) const { return this->simdWidth; }
/*! Says if SLM is needed for it */
INLINE bool getUseSLM(void) const { return this->useSLM; }
+ /*! set constant buffer size and return the cb curbe offset */
+ int32_t setConstBufSize(uint32_t argID, size_t sz) {
+ if(argID >= argNum) return -1;
+ if(args[argID].type != GBE_ARG_CONSTANT_PTR) return -1;
+ if(args[argID].bufSize != sz) {
+ args[argID].bufSize = sz;
+ return ctx->allocConstBuf(argID);
+ }
+ return -1;
+ }
protected:
friend class Context; //!< Owns the kernels
const std::string name; //!< Kernel name
@@ -104,6 +116,7 @@ namespace gbe {
uint32_t simdWidth; //!< SIMD size for the kernel (lane number)
uint32_t stackSize; //!< Stack size (may be 0 if unused)
bool useSLM; //!< SLM requires a special HW config
+ Context *ctx; //!< Save context after compiler to alloc constant buffer curbe
GBE_CLASS(Kernel); //!< Use custom allocators
};
diff --git a/include/CL/cl_intel.h b/include/CL/cl_intel.h
index 680f948..135e340 100644
--- a/include/CL/cl_intel.h
+++ b/include/CL/cl_intel.h
@@ -30,33 +30,65 @@ extern "C" {
/* Track allocations and report current number of unfreed allocations */
extern CL_API_ENTRY cl_int CL_API_CALL
-clIntelReportUnfreed(void);
+clReportUnfreedIntel(void);
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clReportUnfreedIntel_fn)(void);
/* 1 to 1 mapping of drm_intel_bo_map */
extern CL_API_ENTRY void* CL_API_CALL
-clIntelMapBuffer(cl_mem, cl_int*);
+clMapBufferIntel(cl_mem, cl_int*);
+
+typedef CL_API_ENTRY void* (CL_API_CALL *clMapBufferIntel_fn)(cl_mem, cl_int*);
/* 1 to 1 mapping of drm_intel_bo_unmap */
extern CL_API_ENTRY cl_int CL_API_CALL
-clIntelUnmapBuffer(cl_mem);
+clUnmapBufferIntel(cl_mem);
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clUnmapBufferIntel_fn)(cl_mem);
+
+/* 1 to 1 mapping of drm_intel_gem_bo_map_gtt */
+extern CL_API_ENTRY void* CL_API_CALL
+clMapBufferGTTIntel(cl_mem, cl_int*);
+
+typedef CL_API_ENTRY void* (CL_API_CALL *clMapBufferGTTIntel_fn)(cl_mem, cl_int*);
+
+/* 1 to 1 mapping of drm_intel_gem_bo_unmap_gtt */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clUnmapBufferGTTIntel(cl_mem);
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clUnmapBufferGTTIntel_fn)(cl_mem);
/* Pin /Unpin the buffer in GPU memory (must be root) */
extern CL_API_ENTRY cl_int CL_API_CALL
-clIntelPinBuffer(cl_mem);
+clPinBufferIntel(cl_mem);
extern CL_API_ENTRY cl_int CL_API_CALL
-clIntelUnpinBuffer(cl_mem);
+clUnpinBufferIntel(cl_mem);
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clPinBufferIntel_fn)(cl_mem);
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clUnpinBufferIntel_fn)(cl_mem);
/* Get the generation of the Gen device (used to load the proper binary) */
extern CL_API_ENTRY cl_int CL_API_CALL
-clIntelGetGenVersion(cl_device_id device, cl_int *ver);
+clGetGenVersionIntel(cl_device_id device, cl_int *ver);
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGenVersionIntel_fn)(
+ cl_device_id device,
+ cl_int *ver);
/* Create a program from a LLVM source file */
extern CL_API_ENTRY cl_program CL_API_CALL
-clCreateProgramWithLLVM(cl_context /* context */,
- cl_uint /* num_devices */,
- const cl_device_id * /* device_list */,
- const char * /* file */,
- cl_int * /* errcode_ret */);
+clCreateProgramWithLLVMIntel(cl_context /* context */,
+ cl_uint /* num_devices */,
+ const cl_device_id * /* device_list */,
+ const char * /* file */,
+ cl_int * /* errcode_ret */);
+
+typedef CL_API_ENTRY cl_program (CL_API_CALL *clCreateProgramWithLLVMIntel_fn)(
+ cl_context /* context */,
+ cl_uint /* num_devices */,
+ const cl_device_id * /* device_list */,
+ const char * /* file */,
+ cl_int * /* errcode_ret */);
#ifdef __cplusplus
}
diff --git a/kernels/compiler_function_constant.cl b/kernels/compiler_function_constant.cl
new file mode 100644
index 0000000..ca7e874
--- /dev/null
+++ b/kernels/compiler_function_constant.cl
@@ -0,0 +1,6 @@
+__kernel void
+compiler_function_constant(__constant short *c, __global int *dst, int value)
+{
+ int id = (int)get_global_id(0);
+ dst[id] = value + c[id%69];
+}
diff --git a/kernels/compiler_function_constant0.cl b/kernels/compiler_function_constant0.cl
new file mode 100644
index 0000000..f6efcef
--- /dev/null
+++ b/kernels/compiler_function_constant0.cl
@@ -0,0 +1,6 @@
+__kernel void
+compiler_function_constant0(__constant short *c0, __constant char *c1, __global int *dst, int value)
+{
+ int id = (int)get_global_id(0);
+ dst[id] = value + c0[id%69] + c1[15];
+}
diff --git a/src/cl_api.c b/src/cl_api.c
index 2d84ace..03cc0e6 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -793,7 +793,7 @@ clEnqueueReadBuffer(cl_command_queue command_queue,
cl_int err = CL_SUCCESS;
assert(ptr != NULL);
void* temp_ptr = NULL;
- temp_ptr = clIntelMapBuffer(buffer, &err);
+ temp_ptr = clMapBufferIntel(buffer, &err);
assert(err == CL_SUCCESS);
memcpy(ptr, temp_ptr, cb);
return err;
@@ -833,11 +833,11 @@ clEnqueueWriteBuffer(cl_command_queue command_queue,
if (blocking_write != CL_TRUE)
NOT_IMPLEMENTED;
cl_int err;
- void *p = clIntelMapBuffer(buffer, &err);
+ void *p = clMapBufferIntel(buffer, &err);
if (err != CL_SUCCESS)
return err;
memcpy(p + offset, ptr, cb);
- err = clIntelUnmapBuffer(buffer);
+ err = clUnmapBufferIntel(buffer);
return err;
}
@@ -991,7 +991,7 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
NOT_IMPLEMENTED;
if (offset != 0)
NOT_IMPLEMENTED;
- p = clIntelMapBuffer(buffer, errcode_ret);
+ p = clMapBufferIntel(buffer, errcode_ret);
return p;
}
@@ -1021,7 +1021,7 @@ clEnqueueUnmapMemObject(cl_command_queue command_queue,
const cl_event * event_wait_list,
cl_event * event)
{
- return clIntelUnmapBuffer(memobj);
+ return clUnmapBufferIntel(memobj);
}
cl_int
@@ -1170,6 +1170,10 @@ clEnqueueBarrier(cl_command_queue command_queue)
return 0;
}
+#define EXTFUNC(x) \
+ if (strcmp(#x, func_name) == 0) \
+ return (void *)x;
+
void*
clGetExtensionFunctionAddress(const char *func_name)
{
@@ -1177,20 +1181,30 @@ clGetExtensionFunctionAddress(const char *func_name)
return NULL;
#ifdef HAS_OCLIcd
/* cl_khr_icd */
- if (strcmp("clIcdGetPlatformIDsKHR", func_name) == 0)
- return (void *)clIcdGetPlatformIDsKHR;
+ EXTFUNC(clIcdGetPlatformIDsKHR)
#endif
+ EXTFUNC(clCreateProgramWithLLVMIntel)
+ EXTFUNC(clGetGenVersionIntel)
+ EXTFUNC(clMapBufferIntel)
+ EXTFUNC(clUnmapBufferIntel)
+ EXTFUNC(clMapBufferGTTIntel)
+ EXTFUNC(clUnmapBufferGTTIntel)
+ EXTFUNC(clPinBufferIntel)
+ EXTFUNC(clUnpinBufferIntel)
+ EXTFUNC(clReportUnfreedIntel)
return NULL;
}
+#undef EXTFUNC
+
cl_int
-clIntelReportUnfreed(void)
+clReportUnfreedIntel(void)
{
return cl_report_unfreed();
}
void*
-clIntelMapBuffer(cl_mem mem, cl_int *errcode_ret)
+clMapBufferIntel(cl_mem mem, cl_int *errcode_ret)
{
void *ptr = NULL;
cl_int err = CL_SUCCESS;
@@ -1203,7 +1217,7 @@ error:
}
cl_int
-clIntelUnmapBuffer(cl_mem mem)
+clUnmapBufferIntel(cl_mem mem)
{
cl_int err = CL_SUCCESS;
CHECK_MEM (mem);
@@ -1212,8 +1226,31 @@ error:
return err;
}
+void*
+clMapBufferGTTIntel(cl_mem mem, cl_int *errcode_ret)
+{
+ void *ptr = NULL;
+ cl_int err = CL_SUCCESS;
+ CHECK_MEM (mem);
+ ptr = cl_mem_map_gtt(mem);
+error:
+ if (errcode_ret)
+ *errcode_ret = err;
+ return ptr;
+}
+
+cl_int
+clUnmapBufferGTTIntel(cl_mem mem)
+{
+ cl_int err = CL_SUCCESS;
+ CHECK_MEM (mem);
+ err = cl_mem_unmap_gtt(mem);
+error:
+ return err;
+}
+
cl_int
-clIntelPinBuffer(cl_mem mem)
+clPinBufferIntel(cl_mem mem)
{
cl_int err = CL_SUCCESS;
CHECK_MEM (mem);
@@ -1223,7 +1260,7 @@ error:
}
cl_int
-clIntelUnpinBuffer(cl_mem mem)
+clUnpinBufferIntel(cl_mem mem)
{
cl_int err = CL_SUCCESS;
CHECK_MEM (mem);
@@ -1233,17 +1270,17 @@ error:
}
cl_int
-clIntelGetGenVersion(cl_device_id device, cl_int *ver)
+clGetGenVersionIntel(cl_device_id device, cl_int *ver)
{
return cl_device_get_version(device, ver);
}
cl_program
-clCreateProgramWithLLVM(cl_context context,
- cl_uint num_devices,
- const cl_device_id * devices,
- const char * filename,
- cl_int * errcode_ret)
+clCreateProgramWithLLVMIntel(cl_context context,
+ cl_uint num_devices,
+ const cl_device_id * devices,
+ const char * filename,
+ cl_int * errcode_ret)
{
return cl_program_create_from_llvm(context,
num_devices,
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index a22884f..7d604c3 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
@@ -108,7 +108,6 @@ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k)
uint32_t offset; // location of the address in the curbe
arg_type = gbe_kernel_get_arg_type(k->opaque, i);
if (arg_type != GBE_ARG_GLOBAL_PTR &&
- arg_type != GBE_ARG_CONSTANT_PTR &&
arg_type != GBE_ARG_IMAGE &&
arg_type != GBE_ARG_SAMPLER)
continue;
@@ -129,6 +128,25 @@ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k)
return CL_SUCCESS;
}
+LOCAL cl_int cl_command_queue_upload_constant_buffer(cl_kernel k,
+ char * dst)
+{
+ int i;
+ for(i = 0; i < k->arg_n; i++) {
+ enum gbe_arg_type arg_type = gbe_kernel_get_arg_type(k->opaque, i);
+
+ if(arg_type == GBE_ARG_CONSTANT_PTR) {
+ uint32_t offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_EXTRA_ARGUMENT, i+GBE_CONSTANT_BUFFER);
+ cl_mem mem = k->args[i].mem;
+ cl_buffer_map(mem->bo, 1);
+ void * addr = cl_buffer_get_virtual(mem->bo);
+ memcpy(dst + offset, addr, mem->size);
+ cl_buffer_unmap(mem->bo);
+ }
+ }
+ return CL_SUCCESS;
+}
+
#if USE_FULSIM
extern void drm_intel_bufmgr_gem_stop_aubfile(cl_buffer_mgr);
extern void drm_intel_bufmgr_gem_set_aubfile(cl_buffer_mgr, FILE*);
diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h
index 6387ae1..dcfc8c4 100644
--- a/src/cl_command_queue.h
+++ b/src/cl_command_queue.h
@@ -70,5 +70,7 @@ extern cl_int cl_command_queue_finish(cl_command_queue);
/* Bind all the surfaces in the GPGPU state */
extern cl_int cl_command_queue_bind_surface(cl_command_queue, cl_kernel);
+/*update constant buffer to final curbe */
+extern cl_int cl_command_queue_upload_constant_buffer(cl_kernel k, char * dst);
#endif /* __CL_COMMAND_QUEUE_H__ */
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 3a590bc..9402549 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -186,7 +186,8 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
char *final_curbe = NULL; /* Includes them and one sub-buffer per group */
cl_gpgpu_kernel kernel;
const uint32_t simd_sz = cl_kernel_get_simd_width(ker);
- size_t i, batch_sz = 0u, local_sz = 0u, cst_sz = ker->curbe_sz;
+ size_t i, batch_sz = 0u, local_sz = 0u;
+ size_t cst_sz = ker->curbe_sz= gbe_kernel_get_curbe_size(ker->opaque);
size_t thread_n = 0u;
cl_int err = CL_SUCCESS;
@@ -224,8 +225,10 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
if (ker->curbe) {
assert(cst_sz > 0);
TRY_ALLOC (final_curbe, (char*) alloca(thread_n * cst_sz));
- for (i = 0; i < thread_n; ++i)
+ for (i = 0; i < thread_n; ++i) {
memcpy(final_curbe + cst_sz * i, ker->curbe, cst_sz);
+ cl_command_queue_upload_constant_buffer(ker, final_curbe + cst_sz * i);
+ }
TRY (cl_set_varying_payload, ker, final_curbe, local_wk_sz, simd_sz, cst_sz, thread_n);
cl_gpgpu_upload_constants(gpgpu, final_curbe, thread_n*cst_sz);
}
diff --git a/src/cl_kernel.c b/src/cl_kernel.c
index bbd4438..ec0e2e8 100644
--- a/src/cl_kernel.c
+++ b/src/cl_kernel.c
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
@@ -154,6 +154,17 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value)
if (UNLIKELY((arg_type == GBE_ARG_IMAGE && !mem->is_image)
|| (arg_type != GBE_ARG_IMAGE && mem->is_image)))
return CL_INVALID_ARG_VALUE;
+
+ if(arg_type == GBE_ARG_CONSTANT_PTR) {
+ int32_t cbOffset;
+ cbOffset = gbe_kernel_set_const_buffer_size(k->opaque, index, mem->size);
+ //constant ptr's curbe offset changed, update it
+ if(cbOffset >= 0) {
+ offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, index);
+ *((uint32_t *)(k->curbe + offset)) = cbOffset; //cb offset in curbe
+ }
+ }
+
cl_mem_add_ref(mem);
if (k->args[index].mem)
cl_mem_delete(k->args[index].mem);
@@ -177,6 +188,9 @@ cl_kernel_setup(cl_kernel k, gbe_kernel opaque)
cl_context ctx = k->program->ctx;
cl_buffer_mgr bufmgr = cl_context_get_bufmgr(ctx);
+ if(k->bo != NULL)
+ cl_buffer_unreference(k->bo);
+
/* Allocate the gen code here */
const uint32_t code_sz = gbe_kernel_get_code_size(opaque);
const char *code = gbe_kernel_get_code(opaque);
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 4bbaee4..690e5cf 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -141,6 +141,7 @@ cl_mem_allocate(cl_context ctx,
err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
goto error;
}
+ mem->size = sz;
/* Append the buffer in the context buffer list */
pthread_mutex_lock(&ctx->buffer_lock);
@@ -480,6 +481,21 @@ cl_mem_unmap(cl_mem mem)
return CL_SUCCESS;
}
+LOCAL void*
+cl_mem_map_gtt(cl_mem mem)
+{
+ cl_buffer_map_gtt(mem->bo);
+ assert(cl_buffer_get_virtual(mem->bo));
+ return cl_buffer_get_virtual(mem->bo);
+}
+
+LOCAL cl_int
+cl_mem_unmap_gtt(cl_mem mem)
+{
+ cl_buffer_unmap_gtt(mem->bo);
+ return CL_SUCCESS;
+}
+
LOCAL cl_int
cl_mem_pin(cl_mem mem)
{
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 836deb4..99f3835 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -36,6 +36,7 @@ struct _cl_mem {
uint64_t magic; /* To identify it as a memory object */
volatile int ref_n; /* This object is reference counted */
cl_buffer bo; /* Data in GPU memory */
+ size_t size; /* original request size, not alignment size, used in constant buffer */
cl_mem prev, next; /* We chain the memory buffers together */
cl_context ctx; /* Context it belongs to */
cl_mem_flags flags; /* Flags specified at the creation time */
@@ -75,6 +76,12 @@ extern void *cl_mem_map(cl_mem);
/* Unmap a memory object */
extern cl_int cl_mem_unmap(cl_mem);
+/* Directly map a memory object in GTT mode */
+extern void *cl_mem_map_gtt(cl_mem);
+
+/* Unmap a memory object in GTT mode */
+extern cl_int cl_mem_unmap_gtt(cl_mem);
+
/* Pin/unpin the buffer in memory (you must be root) */
extern cl_int cl_mem_pin(cl_mem);
extern cl_int cl_mem_unpin(cl_mem);
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index bed0159..b2e3c97 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -29,6 +29,9 @@ set (utests_sources
compiler_function_argument0.cpp
compiler_function_argument1.cpp
compiler_function_argument.cpp
+ compiler_function_constant0.cpp
+ compiler_function_constant1.cpp
+ compiler_function_constant.cpp
compiler_if_else.cpp
compiler_integer_division.cpp
compiler_integer_remainder.cpp
diff --git a/utests/compiler_copy_image.cpp b/utests/compiler_copy_image.cpp
index 685a189..04c9544 100644
--- a/utests/compiler_copy_image.cpp
+++ b/utests/compiler_copy_image.cpp
@@ -21,8 +21,9 @@ static void compiler_copy_image(void)
desc.image_width = w;
desc.image_height = h;
desc.image_row_pitch = w * sizeof(uint32_t);
-
OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, &format, &desc, buf_data[0]);
+
+ desc.image_row_pitch = 0;
OCL_CREATE_IMAGE(buf[1], 0, &format, &desc, NULL);
OCL_CREATE_SAMPLER(sampler, CL_ADDRESS_REPEAT, CL_FILTER_NEAREST);
free(buf_data[0]);
diff --git a/utests/compiler_fill_image.cpp b/utests/compiler_fill_image.cpp
index c9744cc..c9242b2 100644
--- a/utests/compiler_fill_image.cpp
+++ b/utests/compiler_fill_image.cpp
@@ -13,7 +13,7 @@ static void compiler_fill_image(void)
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
desc.image_width = w;
desc.image_height = h;
- desc.image_row_pitch = w * sizeof(uint32_t);
+ desc.image_row_pitch = 0;
// Setup kernel and images
OCL_CREATE_KERNEL("test_fill_image");
diff --git a/utests/compiler_fill_image0.cpp b/utests/compiler_fill_image0.cpp
index 2fef90c..cf76be3 100644
--- a/utests/compiler_fill_image0.cpp
+++ b/utests/compiler_fill_image0.cpp
@@ -12,7 +12,7 @@ static void compiler_fill_image0(void)
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
desc.image_width = w;
desc.image_height = h;
- desc.image_row_pitch = w * sizeof(uint32_t);
+ desc.image_row_pitch = 0;
// Setup kernel and images
diff --git a/utests/compiler_function_constant.cpp b/utests/compiler_function_constant.cpp
new file mode 100644
index 0000000..20f0ece
--- /dev/null
+++ b/utests/compiler_function_constant.cpp
@@ -0,0 +1,34 @@
+#include "utest_helper.hpp"
+
+void compiler_function_constant(void)
+{
+ const size_t n = 2048;
+ const uint32_t value = 34;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_function_constant");
+ OCL_CREATE_BUFFER(buf[0], 0, 75 * sizeof(short), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ OCL_SET_ARG(2, sizeof(uint32_t), &value);
+
+ OCL_MAP_BUFFER(0);
+ for(uint32_t i = 0; i < 69; ++i)
+ ((short *)buf_data[0])[i] = i;
+ OCL_UNMAP_BUFFER(0);
+
+ // Run the kernel
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(1);
+
+ // Check results
+ for (uint32_t i = 0; i < n; ++i)
+ OCL_ASSERT(((uint32_t *)buf_data[1])[i] == (value + i%69));
+
+ OCL_UNMAP_BUFFER(1);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_function_constant);
diff --git a/utests/compiler_function_constant0.cpp b/utests/compiler_function_constant0.cpp
new file mode 100644
index 0000000..de564f3
--- /dev/null
+++ b/utests/compiler_function_constant0.cpp
@@ -0,0 +1,42 @@
+#include "utest_helper.hpp"
+
+void compiler_function_constant0(void)
+{
+ const size_t n = 2048;
+ const uint32_t value = 34;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_function_constant0");
+ OCL_CREATE_BUFFER(buf[0], 0, 75 * sizeof(short), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, 256 * sizeof(char), NULL);
+ OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(uint32_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+ OCL_SET_ARG(3, sizeof(uint32_t), &value);
+
+ OCL_MAP_BUFFER(0);
+ for(uint32_t i = 0; i < 69; ++i)
+ ((short *)buf_data[0])[i] = i;
+ OCL_UNMAP_BUFFER(0);
+
+ OCL_MAP_BUFFER(1);
+ for(uint32_t i = 0; i < 256; ++i)
+ ((char *)buf_data[1])[i] = 10;
+ ((char *)buf_data[1])[15] = 15;
+ OCL_UNMAP_BUFFER(1);
+
+ // Run the kernel
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+ OCL_MAP_BUFFER(2);
+
+ // Check results
+ for (uint32_t i = 0; i < n; ++i)
+ OCL_ASSERT(((uint32_t *)buf_data[2])[i] == (value + 15 + i%69));
+
+ OCL_UNMAP_BUFFER(2);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_function_constant0);
diff --git a/utests/compiler_function_constant1.cpp b/utests/compiler_function_constant1.cpp
new file mode 100644
index 0000000..b92e6ca
--- /dev/null
+++ b/utests/compiler_function_constant1.cpp
@@ -0,0 +1,47 @@
+#include "utest_helper.hpp"
+
+void compiler_function_constant1(void)
+{
+ const size_t n = 2048;
+ const uint32_t value = 34;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_function_constant");
+ OCL_CREATE_BUFFER(buf[0], 0, 75 * sizeof(short), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ OCL_SET_ARG(2, sizeof(uint32_t), &value);
+
+ OCL_MAP_BUFFER(0);
+ for(uint32_t i = 0; i < 69; ++i)
+ ((short *)buf_data[0])[i] = i;
+ OCL_UNMAP_BUFFER(0);
+
+ // Run the kernel
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+
+ OCL_CREATE_BUFFER(buf[2], 0, 101 * sizeof(short), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[2]);
+ OCL_MAP_BUFFER(2);
+ for(uint32_t i = 0; i < 69; ++i)
+ ((short *)buf_data[2])[i] = 2*i;
+ OCL_UNMAP_BUFFER(2);
+
+ // Run the kernel
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+
+ OCL_MAP_BUFFER(1);
+
+ // Check results
+ for (uint32_t i = 0; i < n; ++i)
+ OCL_ASSERT(((uint32_t *)buf_data[1])[i] == (value + (i%69)*2));
+
+ OCL_UNMAP_BUFFER(1);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_function_constant1);
diff --git a/utests/compiler_movforphi_undef.cpp b/utests/compiler_movforphi_undef.cpp
index 19e395f..30e53bb 100644
--- a/utests/compiler_movforphi_undef.cpp
+++ b/utests/compiler_movforphi_undef.cpp
@@ -8,13 +8,6 @@ static void compiler_movforphi_undef(void)
cl_image_format format;
cl_image_desc desc;
- format.image_channel_order = CL_RGBA;
- format.image_channel_data_type = CL_UNSIGNED_INT8;
- desc.image_type = CL_MEM_OBJECT_IMAGE2D;
- desc.image_width = w;
- desc.image_height = h;
- desc.image_row_pitch = w * sizeof(uint32_t);
-
// Setup kernel and images
OCL_CREATE_KERNEL("test_movforphi_undef");
buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * w * h);
@@ -22,7 +15,15 @@ static void compiler_movforphi_undef(void)
for (uint32_t i = 0; i < w; i++)
((uint32_t*)buf_data[0])[j * w + i] = j * w + i;
+ format.image_channel_order = CL_RGBA;
+ format.image_channel_data_type = CL_UNSIGNED_INT8;
+ desc.image_type = CL_MEM_OBJECT_IMAGE2D;
+ desc.image_width = w;
+ desc.image_height = h;
+ desc.image_row_pitch = w * sizeof(uint32_t);
OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, &format, &desc, buf_data[0]);
+
+ desc.image_row_pitch = 0;
OCL_CREATE_IMAGE(buf[1], 0, &format, &desc, NULL);
OCL_CREATE_SAMPLER(sampler, CL_ADDRESS_REPEAT, CL_FILTER_NEAREST);
free(buf_data[0]);
diff --git a/utests/runtime_flat_address_space.cpp b/utests/runtime_flat_address_space.cpp
index 0357cbd..08167c4 100644
--- a/utests/runtime_flat_address_space.cpp
+++ b/utests/runtime_flat_address_space.cpp
@@ -53,7 +53,7 @@ main(int argc, char *argv[])
NULL);
// Be sure that everything run fine
- dst_buffer = (int *) clIntelMapBuffer(dst[j], &status);
+ dst_buffer = (int *) clMapBufferIntel(dst[j], &status);
if (status != CL_SUCCESS)
goto error;
for (uint32_t i = 0; i < n; ++i)
@@ -61,13 +61,13 @@ main(int argc, char *argv[])
fprintf(stderr, "run-time flat address space failed\n");
exit(-1);
}
- OCL_CALL (clIntelUnmapBuffer, dst[j]);
+ OCL_CALL (clUnmapBufferIntel, dst[j]);
}
for (uint32_t j = 0; j < 24; ++j) OCL_CALL (clReleaseMemObject, dst[j]);
cl_test_destroy();
- printf("%i memory leaks\n", clIntelReportUnfreed());
- assert(clIntelReportUnfreed() == 0);
+ printf("%i memory leaks\n", clReportUnfreedIntel());
+ assert(clReportUnfreedIntel() == 0);
error:
return status;
diff --git a/utests/utest_helper.cpp b/utests/utest_helper.cpp
index b536598..3e73db3 100644
--- a/utests/utest_helper.cpp
+++ b/utests/utest_helper.cpp
@@ -216,7 +216,7 @@ do_kiss_path(const char *file, cl_device_id device)
if (device == NULL)
sub_path = "";
else {
- if (clIntelGetGenVersion(device, &ver) != CL_SUCCESS)
+ if (clGetGenVersionIntel(device, &ver) != CL_SUCCESS)
clpanic("Unable to get Gen version", -1);
sub_path = "";
}
@@ -240,7 +240,7 @@ cl_kernel_init(const char *file_name, const char *kernel_name, int format)
/* Load the program and build it */
ker_path = do_kiss_path(file_name, device);
if (format == LLVM)
- program = clCreateProgramWithLLVM(ctx, 1, &device, ker_path, &status);
+ program = clCreateProgramWithLLVMIntel(ctx, 1, &device, ker_path, &status);
else if (format == SOURCE) {
cl_file_map_t *fm = cl_file_map_new();
FATAL_IF (cl_file_map_open(fm, ker_path) != CL_FILE_MAP_SUCCESS,
@@ -428,8 +428,8 @@ cl_test_destroy(void)
{
cl_kernel_destroy();
cl_ocl_destroy();
- printf("%i memory leaks\n", clIntelReportUnfreed());
- assert(clIntelReportUnfreed() == 0);
+ printf("%i memory leaks\n", clReportUnfreedIntel());
+ assert(clReportUnfreedIntel() == 0);
}
void
@@ -438,7 +438,7 @@ cl_buffer_destroy(void)
int i;
for (i = 0; i < MAX_BUFFER_N; ++i) {
if (buf_data[i] != NULL) {
- clIntelUnmapBuffer(buf[i]);
+ clUnmapBufferIntel(buf[i]);
buf_data[i] = NULL;
}
if (buf[i] != NULL) {
@@ -456,7 +456,7 @@ cl_report_perf_counters(cl_mem perf)
uint32_t i;
if (perf == NULL)
return;
- start = (uint32_t*) clIntelMapBuffer(perf, &status);
+ start = (uint32_t*) clMapBufferIntel(perf, &status);
assert(status == CL_SUCCESS && start != NULL);
end = start + 128;
@@ -481,7 +481,7 @@ cl_report_perf_counters(cl_mem perf)
}
printf("\n\n");
- clIntelUnmapBuffer(perf);
+ clUnmapBufferIntel(perf);
}
struct bmphdr {
diff --git a/utests/utest_helper.hpp b/utests/utest_helper.hpp
index 02249e1..5258416 100644
--- a/utests/utest_helper.hpp
+++ b/utests/utest_helper.hpp
@@ -101,12 +101,12 @@ extern EGLSurface eglSurface;
OCL_CALL2(clCreateSampler, SAMPLER, ctx, 0, ADDRESS_MODE, FILTER_MODE)
#define OCL_MAP_BUFFER(ID) \
- OCL_CALL2(clIntelMapBuffer, buf_data[ID], buf[ID])
+ OCL_CALL2(clMapBufferIntel, buf_data[ID], buf[ID])
#define OCL_UNMAP_BUFFER(ID) \
do { \
if (buf[ID] != NULL) { \
- OCL_CALL (clIntelUnmapBuffer, buf[ID]); \
+ OCL_CALL (clUnmapBufferIntel, buf[ID]); \
buf_data[ID] = NULL; \
} \
} while (0)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-opencl/beignet.git
More information about the Pkg-opencl-devel
mailing list