forked from OSchip/llvm-project
[OpenCL] Add missing address spaces in IR generation of blocks
Modify ObjC blocks impl wrt address spaces as follows: - keep default private address space for blocks generated as local variables (with captures); - add global address space for global block literals (no captures); - make the block invoke function and enqueue_kernel prototype with the generic AS block pointer parameter to accommodate both private and global AS cases from above; - add block handling into default AS because it's implemented as a special pointer type (BlockPointer) in the frontend and therefore it is used as a pointer everywhere. This is also needed to accommodate both private and global AS blocks for the two cases above. - removes ObjC RT specific symbols (NSConcreteStackBlock and NSConcreteGlobalBlock) in the OpenCL mode. Review: https://reviews.llvm.org/D28814 llvm-svn: 293286
This commit is contained in:
parent
9e962add70
commit
af0a7bbbe2
|
@ -1571,8 +1571,9 @@ bool CastExpr::CastConsistency() const {
|
|||
goto CheckNoBasePath;
|
||||
|
||||
case CK_AddressSpaceConversion:
|
||||
assert(getType()->isPointerType());
|
||||
assert(getSubExpr()->getType()->isPointerType());
|
||||
assert(getType()->isPointerType() || getType()->isBlockPointerType());
|
||||
assert(getSubExpr()->getType()->isPointerType() ||
|
||||
getSubExpr()->getType()->isBlockPointerType());
|
||||
assert(getType()->getPointeeType().getAddressSpace() !=
|
||||
getSubExpr()->getType()->getPointeeType().getAddressSpace());
|
||||
// These should not have an inheritance path.
|
||||
|
|
|
@ -718,7 +718,12 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
|
|||
|
||||
// Otherwise, we have to emit this as a local block.
|
||||
|
||||
llvm::Constant *isa = CGM.getNSConcreteStackBlock();
|
||||
llvm::Constant *isa =
|
||||
(!CGM.getContext().getLangOpts().OpenCL)
|
||||
? CGM.getNSConcreteStackBlock()
|
||||
: CGM.getNullPointer(cast<llvm::PointerType>(
|
||||
CGM.getNSConcreteStackBlock()->getType()),
|
||||
QualType(getContext().VoidPtrTy));
|
||||
isa = llvm::ConstantExpr::getBitCast(isa, VoidPtrTy);
|
||||
|
||||
// Build the block descriptor.
|
||||
|
@ -906,9 +911,8 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
|
|||
|
||||
// Cast to the converted block-pointer type, which happens (somewhat
|
||||
// unfortunately) to be a pointer to function type.
|
||||
llvm::Value *result =
|
||||
Builder.CreateBitCast(blockAddr.getPointer(),
|
||||
ConvertType(blockInfo.getBlockExpr()->getType()));
|
||||
llvm::Value *result = Builder.CreatePointerCast(
|
||||
blockAddr.getPointer(), ConvertType(blockInfo.getBlockExpr()->getType()));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@ -976,21 +980,41 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
|
|||
llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee());
|
||||
|
||||
// Get a pointer to the generic block literal.
|
||||
// For OpenCL we generate generic AS void ptr to be able to reuse the same
|
||||
// block definition for blocks with captures generated as private AS local
|
||||
// variables and without captures generated as global AS program scope
|
||||
// variables.
|
||||
unsigned AddrSpace = 0;
|
||||
if (getLangOpts().OpenCL)
|
||||
AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic);
|
||||
|
||||
llvm::Type *BlockLiteralTy =
|
||||
llvm::PointerType::getUnqual(CGM.getGenericBlockLiteralType());
|
||||
llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace);
|
||||
|
||||
// Bitcast the callee to a block literal.
|
||||
BlockPtr = Builder.CreateBitCast(BlockPtr, BlockLiteralTy, "block.literal");
|
||||
BlockPtr =
|
||||
Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal");
|
||||
|
||||
// Get the function pointer from the literal.
|
||||
llvm::Value *FuncPtr =
|
||||
Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, 3);
|
||||
|
||||
BlockPtr = Builder.CreateBitCast(BlockPtr, VoidPtrTy);
|
||||
|
||||
// Add the block literal.
|
||||
CallArgList Args;
|
||||
Args.add(RValue::get(BlockPtr), getContext().VoidPtrTy);
|
||||
|
||||
QualType VoidPtrQualTy = getContext().VoidPtrTy;
|
||||
llvm::Type *GenericVoidPtrTy = VoidPtrTy;
|
||||
if (getLangOpts().OpenCL) {
|
||||
GenericVoidPtrTy = Builder.getInt8PtrTy(
|
||||
getContext().getTargetAddressSpace(LangAS::opencl_generic));
|
||||
VoidPtrQualTy =
|
||||
getContext().getPointerType(getContext().getAddrSpaceQualType(
|
||||
getContext().VoidTy, LangAS::opencl_generic));
|
||||
}
|
||||
|
||||
BlockPtr = Builder.CreatePointerCast(BlockPtr, GenericVoidPtrTy);
|
||||
Args.add(RValue::get(BlockPtr), VoidPtrQualTy);
|
||||
|
||||
QualType FnType = BPT->getPointeeType();
|
||||
|
||||
|
@ -1097,7 +1121,12 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
|
|||
auto fields = builder.beginStruct();
|
||||
|
||||
// isa
|
||||
fields.add(CGM.getNSConcreteGlobalBlock());
|
||||
fields.add(
|
||||
(!CGM.getContext().getLangOpts().OpenCL)
|
||||
? CGM.getNSConcreteGlobalBlock()
|
||||
: CGM.getNullPointer(cast<llvm::PointerType>(
|
||||
CGM.getNSConcreteGlobalBlock()->getType()),
|
||||
QualType(CGM.getContext().VoidPtrTy)));
|
||||
|
||||
// __flags
|
||||
BlockFlags flags = BLOCK_IS_GLOBAL | BLOCK_HAS_SIGNATURE;
|
||||
|
@ -1114,16 +1143,19 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
|
|||
// Descriptor
|
||||
fields.add(buildBlockDescriptor(CGM, blockInfo));
|
||||
|
||||
llvm::Constant *literal =
|
||||
fields.finishAndCreateGlobal("__block_literal_global",
|
||||
blockInfo.BlockAlign,
|
||||
/*constant*/ true);
|
||||
unsigned AddrSpace = 0;
|
||||
if (CGM.getContext().getLangOpts().OpenCL)
|
||||
AddrSpace = CGM.getContext().getTargetAddressSpace(LangAS::opencl_global);
|
||||
|
||||
llvm::Constant *literal = fields.finishAndCreateGlobal(
|
||||
"__block_literal_global", blockInfo.BlockAlign,
|
||||
/*constant*/ true, llvm::GlobalVariable::InternalLinkage, AddrSpace);
|
||||
|
||||
// Return a constant of the appropriately-casted type.
|
||||
llvm::Type *RequiredType =
|
||||
CGM.getTypes().ConvertType(blockInfo.getBlockExpr()->getType());
|
||||
llvm::Constant *Result =
|
||||
llvm::ConstantExpr::getBitCast(literal, RequiredType);
|
||||
llvm::ConstantExpr::getPointerCast(literal, RequiredType);
|
||||
CGM.setAddrOfGlobalBlock(blockInfo.BlockExpression, Result);
|
||||
return Result;
|
||||
}
|
||||
|
@ -1155,9 +1187,13 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D,
|
|||
|
||||
// Instead of messing around with LocalDeclMap, just set the value
|
||||
// directly as BlockPointer.
|
||||
BlockPointer = Builder.CreateBitCast(arg,
|
||||
BlockInfo->StructureType->getPointerTo(),
|
||||
"block");
|
||||
BlockPointer = Builder.CreatePointerCast(
|
||||
arg,
|
||||
BlockInfo->StructureType->getPointerTo(
|
||||
getContext().getLangOpts().OpenCL
|
||||
? getContext().getTargetAddressSpace(LangAS::opencl_generic)
|
||||
: 0),
|
||||
"block");
|
||||
}
|
||||
|
||||
Address CodeGenFunction::LoadBlockStruct() {
|
||||
|
@ -1196,6 +1232,15 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
|
|||
// The first argument is the block pointer. Just take it as a void*
|
||||
// and cast it later.
|
||||
QualType selfTy = getContext().VoidPtrTy;
|
||||
|
||||
// For OpenCL passed block pointer can be private AS local variable or
|
||||
// global AS program scope variable (for the case with and without captures).
|
||||
// Generic AS is used therefore to be able to accomodate both private and
|
||||
// generic AS in one implementation.
|
||||
if (getLangOpts().OpenCL)
|
||||
selfTy = getContext().getPointerType(getContext().getAddrSpaceQualType(
|
||||
getContext().VoidTy, LangAS::opencl_generic));
|
||||
|
||||
IdentifierInfo *II = &CGM.getContext().Idents.get(".block_descriptor");
|
||||
|
||||
ImplicitParamDecl selfDecl(getContext(), const_cast<BlockDecl*>(blockDecl),
|
||||
|
|
|
@ -2493,6 +2493,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
|
|||
|
||||
llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
|
||||
llvm::Type *RangeTy = ConvertType(getContext().OCLNDRangeTy);
|
||||
llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
|
||||
getContext().getTargetAddressSpace(LangAS::opencl_generic));
|
||||
|
||||
llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
|
||||
llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
|
||||
|
@ -2502,12 +2504,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
|
|||
// The most basic form of the call with parameters:
|
||||
// queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
|
||||
Name = "__enqueue_kernel_basic";
|
||||
llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, Int8PtrTy};
|
||||
llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy};
|
||||
llvm::FunctionType *FTy = llvm::FunctionType::get(
|
||||
Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false);
|
||||
|
||||
llvm::Value *Block =
|
||||
Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy);
|
||||
llvm::Value *Block = Builder.CreatePointerCast(
|
||||
EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
|
||||
|
||||
return RValue::get(Builder.CreateCall(
|
||||
CGM.CreateRuntimeFunction(FTy, Name), {Queue, Flags, Range, Block}));
|
||||
|
@ -2518,14 +2520,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
|
|||
if (E->getArg(3)->getType()->isBlockPointerType()) {
|
||||
// No events passed, but has variadic arguments.
|
||||
Name = "__enqueue_kernel_vaargs";
|
||||
llvm::Value *Block =
|
||||
Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy);
|
||||
llvm::Value *Block = Builder.CreatePointerCast(
|
||||
EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
|
||||
// Create a vector of the arguments, as well as a constant value to
|
||||
// express to the runtime the number of variadic arguments.
|
||||
std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block,
|
||||
ConstantInt::get(IntTy, NumArgs - 4)};
|
||||
std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, Int8PtrTy,
|
||||
IntTy};
|
||||
std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy,
|
||||
GenericVoidPtrTy, IntTy};
|
||||
|
||||
// Each of the following arguments specifies the size of the corresponding
|
||||
// argument passed to the enqueued block.
|
||||
|
@ -2555,12 +2557,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
|
|||
// Convert to generic address space.
|
||||
EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
|
||||
ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy);
|
||||
llvm::Value *Block =
|
||||
Builder.CreateBitCast(EmitScalarExpr(E->getArg(6)), Int8PtrTy);
|
||||
llvm::Value *Block = Builder.CreatePointerCast(
|
||||
EmitScalarExpr(E->getArg(6)), GenericVoidPtrTy);
|
||||
|
||||
std::vector<llvm::Type *> ArgTys = {QueueTy, Int32Ty, RangeTy,
|
||||
Int32Ty, EventPtrTy, EventPtrTy,
|
||||
Int8PtrTy};
|
||||
std::vector<llvm::Type *> ArgTys = {
|
||||
QueueTy, Int32Ty, RangeTy, Int32Ty,
|
||||
EventPtrTy, EventPtrTy, GenericVoidPtrTy};
|
||||
|
||||
std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents,
|
||||
EventList, ClkEvent, Block};
|
||||
|
@ -2596,20 +2598,24 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
|
|||
// OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
|
||||
// parameter.
|
||||
case Builtin::BIget_kernel_work_group_size: {
|
||||
llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
|
||||
getContext().getTargetAddressSpace(LangAS::opencl_generic));
|
||||
Value *Arg = EmitScalarExpr(E->getArg(0));
|
||||
Arg = Builder.CreateBitCast(Arg, Int8PtrTy);
|
||||
return RValue::get(
|
||||
Builder.CreateCall(CGM.CreateRuntimeFunction(
|
||||
llvm::FunctionType::get(IntTy, Int8PtrTy, false),
|
||||
"__get_kernel_work_group_size_impl"),
|
||||
Arg));
|
||||
}
|
||||
case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
|
||||
Value *Arg = EmitScalarExpr(E->getArg(0));
|
||||
Arg = Builder.CreateBitCast(Arg, Int8PtrTy);
|
||||
Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
|
||||
return RValue::get(Builder.CreateCall(
|
||||
CGM.CreateRuntimeFunction(
|
||||
llvm::FunctionType::get(IntTy, Int8PtrTy, false),
|
||||
llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
|
||||
"__get_kernel_work_group_size_impl"),
|
||||
Arg));
|
||||
}
|
||||
case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
|
||||
llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
|
||||
getContext().getTargetAddressSpace(LangAS::opencl_generic));
|
||||
Value *Arg = EmitScalarExpr(E->getArg(0));
|
||||
Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
|
||||
return RValue::get(Builder.CreateCall(
|
||||
CGM.CreateRuntimeFunction(
|
||||
llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
|
||||
"__get_kernel_preferred_work_group_multiple_impl"),
|
||||
Arg));
|
||||
}
|
||||
|
|
|
@ -7405,7 +7405,13 @@ checkBlockPointerTypesForAssignment(Sema &S, QualType LHSType,
|
|||
Sema::AssignConvertType ConvTy = Sema::Compatible;
|
||||
|
||||
// For blocks we enforce that qualifiers are identical.
|
||||
if (lhptee.getLocalQualifiers() != rhptee.getLocalQualifiers())
|
||||
Qualifiers LQuals = lhptee.getLocalQualifiers();
|
||||
Qualifiers RQuals = rhptee.getLocalQualifiers();
|
||||
if (S.getLangOpts().OpenCL) {
|
||||
LQuals.removeAddressSpace();
|
||||
RQuals.removeAddressSpace();
|
||||
}
|
||||
if (LQuals != RQuals)
|
||||
ConvTy = Sema::CompatiblePointerDiscardsQualifiers;
|
||||
|
||||
if (!S.Context.typesAreBlockPointerCompatible(LHSType, RHSType))
|
||||
|
@ -7630,7 +7636,12 @@ Sema::CheckAssignmentConstraints(QualType LHSType, ExprResult &RHS,
|
|||
// U^ -> void*
|
||||
if (RHSType->getAs<BlockPointerType>()) {
|
||||
if (LHSPointer->getPointeeType()->isVoidType()) {
|
||||
Kind = CK_BitCast;
|
||||
unsigned AddrSpaceL = LHSPointer->getPointeeType().getAddressSpace();
|
||||
unsigned AddrSpaceR = RHSType->getAs<BlockPointerType>()
|
||||
->getPointeeType()
|
||||
.getAddressSpace();
|
||||
Kind =
|
||||
AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion : CK_BitCast;
|
||||
return Compatible;
|
||||
}
|
||||
}
|
||||
|
@ -7642,7 +7653,13 @@ Sema::CheckAssignmentConstraints(QualType LHSType, ExprResult &RHS,
|
|||
if (isa<BlockPointerType>(LHSType)) {
|
||||
// U^ -> T^
|
||||
if (RHSType->isBlockPointerType()) {
|
||||
Kind = CK_BitCast;
|
||||
unsigned AddrSpaceL = LHSType->getAs<BlockPointerType>()
|
||||
->getPointeeType()
|
||||
.getAddressSpace();
|
||||
unsigned AddrSpaceR = RHSType->getAs<BlockPointerType>()
|
||||
->getPointeeType()
|
||||
.getAddressSpace();
|
||||
Kind = AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion : CK_BitCast;
|
||||
return checkBlockPointerTypesForAssignment(*this, LHSType, RHSType);
|
||||
}
|
||||
|
||||
|
|
|
@ -6942,8 +6942,10 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
|
|||
(TAL == TAL_DeclSpec || TAL == TAL_DeclChunk)) {
|
||||
Declarator &D = state.getDeclarator();
|
||||
if (state.getCurrentChunkIndex() > 0 &&
|
||||
D.getTypeObject(state.getCurrentChunkIndex() - 1).Kind ==
|
||||
DeclaratorChunk::Pointer) {
|
||||
(D.getTypeObject(state.getCurrentChunkIndex() - 1).Kind ==
|
||||
DeclaratorChunk::Pointer ||
|
||||
D.getTypeObject(state.getCurrentChunkIndex() - 1).Kind ==
|
||||
DeclaratorChunk::BlockPointer)) {
|
||||
type = state.getSema().Context.getAddrSpaceQualType(
|
||||
type, LangAS::opencl_generic);
|
||||
} else if (state.getCurrentChunkIndex() == 0 &&
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -O0 -triple spir-unknown-unknown | FileCheck %s --check-prefix=COMMON
|
||||
// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -O0 -triple amdgcn-amd-amdhsa-opencl | FileCheck %s --check-prefix=AMD
|
||||
|
||||
// Checking for null instead of @__NSConcreteGlobalBlock symbol
|
||||
// COMMON: @__block_literal_global = internal addrspace(1) constant { i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } { i8** null
|
||||
// AMD: @__block_literal_global = internal addrspace(1) constant { i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } { i8** addrspacecast (i8* addrspace(4)* null to i8**)
|
||||
void (^block_A)(local void *) = ^(local void *a) {
|
||||
return;
|
||||
};
|
||||
|
||||
void foo(){
|
||||
int i;
|
||||
// Checking for null instead of @_NSConcreteStackBlock symbol
|
||||
// COMMON: store i8* null, i8** %block.isa
|
||||
// AMD: store i8* addrspacecast (i8 addrspace(4)* null to i8*), i8** %block.isa,
|
||||
int (^ block_B)(void) = ^{
|
||||
return i;
|
||||
};
|
||||
}
|
|
@ -2,9 +2,8 @@
|
|||
// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefix=COMMON --check-prefix=B64
|
||||
|
||||
typedef void (^bl_t)(local void *);
|
||||
|
||||
// N.B. The check here only exists to set BL_GLOBAL
|
||||
// COMMON: @block_G = {{.*}}bitcast ([[BL_GLOBAL:[^@]+@__block_literal_global(\.[0-9]+)?]]
|
||||
// COMMON: @block_G = addrspace(1) constant void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* [[BL_GLOBAL:@__block_literal_global(\.[0-9]+)?]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*)
|
||||
const bl_t block_G = (bl_t) ^ (local void *a) {};
|
||||
|
||||
kernel void device_side_enqueue(global int *a, global int *b, int i) {
|
||||
|
@ -25,8 +24,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
|
|||
// COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
|
||||
// COMMON: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange
|
||||
// COMMON: [[BL:%[0-9]+]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor addrspace(2)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block to void ()*
|
||||
// COMMON: [[BL_I8:%[0-9]+]] = bitcast void ()* [[BL]] to i8*
|
||||
// COMMON: call i32 @__enqueue_kernel_basic(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* [[BL_I8]])
|
||||
// COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)*
|
||||
// COMMON: call i32 @__enqueue_kernel_basic(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8 addrspace(4)* [[BL_I8]])
|
||||
enqueue_kernel(default_queue, flags, ndrange,
|
||||
^(void) {
|
||||
a[i] = b[i];
|
||||
|
@ -38,8 +37,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
|
|||
// COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %event_wait_list to %opencl.clk_event_t{{.*}}* addrspace(4)*
|
||||
// COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)*
|
||||
// COMMON: [[BL:%[0-9]+]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor addrspace(2)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()*
|
||||
// COMMON: [[BL_I8:%[0-9]+]] = bitcast void ()* [[BL]] to i8*
|
||||
// COMMON: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], i8* [[BL_I8]])
|
||||
// COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)*
|
||||
// COMMON: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], i8 addrspace(4)* [[BL_I8]])
|
||||
enqueue_kernel(default_queue, flags, ndrange, 2, &event_wait_list, &clk_event,
|
||||
^(void) {
|
||||
a[i] = b[i];
|
||||
|
@ -48,8 +47,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
|
|||
// COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
|
||||
// COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
|
||||
// COMMON: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange
|
||||
// B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 256)
|
||||
// B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i64 256)
|
||||
// B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8 addrspace(4)*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32 256)
|
||||
// B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8 addrspace(4)*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i64 256)
|
||||
enqueue_kernel(default_queue, flags, ndrange,
|
||||
^(local void *p) {
|
||||
return;
|
||||
|
@ -60,9 +59,9 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
|
|||
// COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
|
||||
// COMMON: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange
|
||||
// B32: [[SIZE:%[0-9]+]] = zext i8 {{%[0-9]+}} to i32
|
||||
// B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 [[SIZE]])
|
||||
// B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8 addrspace(4)*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32 [[SIZE]])
|
||||
// B64: [[SIZE:%[0-9]+]] = zext i8 {{%[0-9]+}} to i64
|
||||
// B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i64 [[SIZE]])
|
||||
// B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8 addrspace(4)*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i64 [[SIZE]])
|
||||
enqueue_kernel(default_queue, flags, ndrange,
|
||||
^(local void *p) {
|
||||
return;
|
||||
|
@ -75,8 +74,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
|
|||
// COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
|
||||
// COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}* addrspace(4)*
|
||||
// COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)*
|
||||
// B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t{{.*}}* addrspace(4)*, %opencl.clk_event_t{{.*}}* addrspace(4)*, i8*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}} [[WAIT_EVNT]], %opencl.clk_event_t{{.*}} [[EVNT]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 256)
|
||||
// B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t{{.*}}* addrspace(4)*, %opencl.clk_event_t{{.*}}* addrspace(4)*, i8*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}} [[WAIT_EVNT]], %opencl.clk_event_t{{.*}} [[EVNT]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i64 256)
|
||||
// B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t{{.*}}* addrspace(4)*, %opencl.clk_event_t{{.*}}* addrspace(4)*, i8 addrspace(4)*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}} [[WAIT_EVNT]], %opencl.clk_event_t{{.*}} [[EVNT]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32 256)
|
||||
// B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t{{.*}}* addrspace(4)*, %opencl.clk_event_t{{.*}}* addrspace(4)*, i8 addrspace(4)*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}} [[WAIT_EVNT]], %opencl.clk_event_t{{.*}} [[EVNT]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i64 256)
|
||||
enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event,
|
||||
^(local void *p) {
|
||||
return;
|
||||
|
@ -90,9 +89,9 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
|
|||
// COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}* addrspace(4)*
|
||||
// COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)*
|
||||
// B32: [[SIZE:%[0-9]+]] = zext i8 {{%[0-9]+}} to i32
|
||||
// B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t{{.*}}* addrspace(4)*, %opencl.clk_event_t{{.*}}* addrspace(4)*, i8*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 [[SIZE]])
|
||||
// B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t{{.*}}* addrspace(4)*, %opencl.clk_event_t{{.*}}* addrspace(4)*, i8 addrspace(4)*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32 [[SIZE]])
|
||||
// B64: [[SIZE:%[0-9]+]] = zext i8 {{%[0-9]+}} to i64
|
||||
// B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t{{.*}}* addrspace(4)*, %opencl.clk_event_t{{.*}}* addrspace(4)*, i8*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i64 [[SIZE]])
|
||||
// B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t{{.*}}* addrspace(4)*, %opencl.clk_event_t{{.*}}* addrspace(4)*, i8 addrspace(4)*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i64 [[SIZE]])
|
||||
enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event,
|
||||
^(local void *p) {
|
||||
return;
|
||||
|
@ -104,9 +103,9 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
|
|||
// COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
|
||||
// COMMON: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange
|
||||
// B32: [[SIZE:%[0-9]+]] = trunc i64 {{%[0-9]+}} to i32
|
||||
// B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 [[SIZE]])
|
||||
// B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8 addrspace(4)*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32 [[SIZE]])
|
||||
// B64: [[SIZE:%[0-9]+]] = load i64, i64* %l
|
||||
// B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i64 [[SIZE]])
|
||||
// B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8 addrspace(4)*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i64 [[SIZE]])
|
||||
enqueue_kernel(default_queue, flags, ndrange,
|
||||
^(local void *p) {
|
||||
return;
|
||||
|
@ -116,8 +115,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
|
|||
// COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue
|
||||
// COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
|
||||
// COMMON: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange
|
||||
// B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 0)
|
||||
// B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i64 4294967296)
|
||||
// B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8 addrspace(4)*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32 0)
|
||||
// B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8 addrspace(4)*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i64 4294967296)
|
||||
enqueue_kernel(default_queue, flags, ndrange,
|
||||
^(local void *p) {
|
||||
return;
|
||||
|
@ -126,22 +125,22 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
|
|||
|
||||
// The full type of these expressions are long (and repeated elsewhere), so we
|
||||
// capture it as part of the regex for convenience and clarity.
|
||||
// COMMON: store void ()* bitcast ([[BL_A:[^@]+@__block_literal_global.[0-9]+]] to void ()*), void ()** %block_A
|
||||
// COMMON: store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* [[BL_A:@__block_literal_global(\.[0-9]+)?]] to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %block_A
|
||||
void (^const block_A)(void) = ^{
|
||||
return;
|
||||
};
|
||||
|
||||
// COMMON: store void (i8 addrspace(3)*)* bitcast ([[BL_B:[^@]+@__block_literal_global.[0-9]+]] to void (i8 addrspace(3)*)*), void (i8 addrspace(3)*)** %block_B
|
||||
// COMMON: store void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* [[BL_B:@__block_literal_global(\.[0-9]+)?]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*), void (i8 addrspace(3)*) addrspace(4)** %block_B
|
||||
void (^const block_B)(local void *) = ^(local void *a) {
|
||||
return;
|
||||
};
|
||||
|
||||
// COMMON: call i32 @__get_kernel_work_group_size_impl(i8* bitcast ([[BL_A]] to i8*))
|
||||
// COMMON: call i32 @__get_kernel_work_group_size_impl(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* [[BL_A]] to i8 addrspace(1)*) to i8 addrspace(4)*))
|
||||
unsigned size = get_kernel_work_group_size(block_A);
|
||||
// COMMON: call i32 @__get_kernel_work_group_size_impl(i8* bitcast ([[BL_B]] to i8*))
|
||||
// COMMON: call i32 @__get_kernel_work_group_size_impl(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* [[BL_B]] to i8 addrspace(1)*) to i8 addrspace(4)*))
|
||||
size = get_kernel_work_group_size(block_B);
|
||||
// COMMON: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8* bitcast ([[BL_A]] to i8*))
|
||||
// COMMON: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* [[BL_A]] to i8 addrspace(1)*) to i8 addrspace(4)*))
|
||||
size = get_kernel_preferred_work_group_size_multiple(block_A);
|
||||
// COMMON: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8* bitcast ([[BL_GLOBAL]] to i8*))
|
||||
// COMMON: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*))
|
||||
size = get_kernel_preferred_work_group_size_multiple(block_G);
|
||||
}
|
||||
|
|
|
@ -4,26 +4,34 @@
|
|||
void f0(int (^const bl)());
|
||||
// All blocks declarations must be const qualified and initialized.
|
||||
void f1() {
|
||||
int (^bl1)() = ^() {return 1;};
|
||||
int (^const bl2)() = ^(){return 1;};
|
||||
int (^bl1)(void) = ^() {
|
||||
return 1;
|
||||
};
|
||||
int (^const bl2)(void) = ^() {
|
||||
return 1;
|
||||
};
|
||||
f0(bl1);
|
||||
f0(bl2);
|
||||
bl1 = bl2; // expected-error{{invalid operands to binary expression ('int (^const)()' and 'int (^const)()')}}
|
||||
bl1 = bl2; // expected-error{{invalid operands to binary expression ('int (__generic ^const)(void)' and 'int (__generic ^const)(void)')}}
|
||||
int (^const bl3)(); // expected-error{{invalid block variable declaration - must be initialized}}
|
||||
}
|
||||
|
||||
// A block with extern storage class is not allowed.
|
||||
extern int (^bl)() = ^(){return 1;}; // expected-error{{invalid block variable declaration - using 'extern' storage class is disallowed}}
|
||||
extern int (^bl)(void) = ^() { // expected-error{{invalid block variable declaration - using 'extern' storage class is disallowed}}
|
||||
return 1;
|
||||
};
|
||||
void f2() {
|
||||
extern int (^bl)() = ^(){return 1;}; // expected-error{{invalid block variable declaration - using 'extern' storage class is disallowed}}
|
||||
extern int (^bl)(void) = ^() { // expected-error{{invalid block variable declaration - using 'extern' storage class is disallowed}}
|
||||
return 1;
|
||||
};
|
||||
}
|
||||
|
||||
// A block cannot be the return value of a function.
|
||||
typedef int (^bl_t)(void);
|
||||
bl_t f3(bl_t bl); // expected-error{{declaring function return value of type 'bl_t' (aka 'int (^const)(void)') is not allowed}}
|
||||
bl_t f3(bl_t bl); // expected-error{{declaring function return value of type 'bl_t' (aka 'int (__generic ^const)(void)') is not allowed}}
|
||||
|
||||
struct bl_s {
|
||||
int (^bl)(void); // expected-error {{the 'int (^const)(void)' type cannot be used to declare a structure or union field}}
|
||||
int (^bl)(void); // expected-error {{the 'int (__generic ^const)(void)' type cannot be used to declare a structure or union field}}
|
||||
};
|
||||
|
||||
void f4() {
|
||||
|
@ -45,16 +53,16 @@ void f5(int i) {
|
|||
bl2_t bl2 = ^(int i) {
|
||||
return 2;
|
||||
};
|
||||
bl2_t arr[] = {bl1, bl2}; // expected-error {{array of 'bl2_t' (aka 'int (^const)(int)') type is invalid in OpenCL}}
|
||||
bl2_t arr[] = {bl1, bl2}; // expected-error {{array of 'bl2_t' (aka 'int (__generic ^const)(int)') type is invalid in OpenCL}}
|
||||
int tmp = i ? bl1(i) // expected-error {{block type cannot be used as expression in ternary expression in OpenCL}}
|
||||
: bl2(i); // expected-error {{block type cannot be used as expression in ternary expression in OpenCL}}
|
||||
}
|
||||
// A block pointer type and all pointer operations are disallowed
|
||||
void f6(bl2_t *bl_ptr) { // expected-error{{pointer to type '__generic bl2_t' (aka 'int (^const __generic)(int)') is invalid in OpenCL}}
|
||||
void f6(bl2_t *bl_ptr) { // expected-error{{pointer to type '__generic bl2_t' (aka 'int (__generic ^const __generic)(int)') is invalid in OpenCL}}
|
||||
bl2_t bl = ^(int i) {
|
||||
return 1;
|
||||
};
|
||||
bl2_t *p; // expected-error {{pointer to type '__generic bl2_t' (aka 'int (^const __generic)(int)') is invalid in OpenCL}}
|
||||
*bl; // expected-error {{invalid argument type 'bl2_t' (aka 'int (^const)(int)') to unary expression}}
|
||||
&bl; // expected-error {{invalid argument type 'bl2_t' (aka 'int (^const)(int)') to unary expression}}
|
||||
bl2_t *p; // expected-error {{pointer to type '__generic bl2_t' (aka 'int (__generic ^const __generic)(int)') is invalid in OpenCL}}
|
||||
*bl; // expected-error {{invalid argument type 'bl2_t' (aka 'int (__generic ^const)(int)') to unary expression}}
|
||||
&bl; // expected-error {{invalid argument type 'bl2_t' (aka 'int (__generic ^const)(int)') to unary expression}}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue