forked from OSchip/llvm-project
[OpenCL] Add missing subgroup builtins
This adds get_kernel_max_sub_group_size_for_ndrange and get_kernel_sub_group_count_for_ndrange. llvm-svn: 309678
This commit is contained in:
parent
91ff5c6d47
commit
fa76b49cef
|
@ -1398,8 +1398,10 @@ LANGBUILTIN(get_pipe_max_packets, "Ui.", "tn", OCLC20_LANG)
|
|||
// OpenCL v2.0 s6.13.17 - Enqueue kernel functions.
|
||||
// Custom builtin check allows to perform special check of passed block arguments.
|
||||
LANGBUILTIN(enqueue_kernel, "i.", "tn", OCLC20_LANG)
|
||||
LANGBUILTIN(get_kernel_work_group_size, "i.", "tn", OCLC20_LANG)
|
||||
LANGBUILTIN(get_kernel_preferred_work_group_size_multiple, "i.", "tn", OCLC20_LANG)
|
||||
LANGBUILTIN(get_kernel_work_group_size, "Ui.", "tn", OCLC20_LANG)
|
||||
LANGBUILTIN(get_kernel_preferred_work_group_size_multiple, "Ui.", "tn", OCLC20_LANG)
|
||||
LANGBUILTIN(get_kernel_max_sub_group_size_for_ndrange, "Ui.", "tn", OCLC20_LANG)
|
||||
LANGBUILTIN(get_kernel_sub_group_count_for_ndrange, "Ui.", "tn", OCLC20_LANG)
|
||||
|
||||
// OpenCL v2.0 s6.13.9 - Address space qualifier functions.
|
||||
LANGBUILTIN(to_global, "v*v*", "tn", OCLC20_LANG)
|
||||
|
|
|
@ -2704,6 +2704,25 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
|
|||
"__get_kernel_preferred_work_group_multiple_impl"),
|
||||
Arg));
|
||||
}
|
||||
case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
|
||||
case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
|
||||
llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
|
||||
getContext().getTargetAddressSpace(LangAS::opencl_generic));
|
||||
LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
|
||||
llvm::Value *NDRange = NDRangeL.getAddress().getPointer();
|
||||
Value *Block = EmitScalarExpr(E->getArg(1));
|
||||
Block = Builder.CreatePointerCast(Block, GenericVoidPtrTy);
|
||||
const char *Name =
|
||||
BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
|
||||
? "__get_kernel_max_sub_group_size_for_ndrange_impl"
|
||||
: "__get_kernel_sub_group_count_for_ndrange_impl";
|
||||
return RValue::get(Builder.CreateCall(
|
||||
CGM.CreateRuntimeFunction(
|
||||
llvm::FunctionType::get(
|
||||
IntTy, {NDRange->getType(), GenericVoidPtrTy}, false),
|
||||
Name),
|
||||
{NDRange, Block}));
|
||||
}
|
||||
case Builtin::BIprintf:
|
||||
if (getTarget().getTriple().isNVPTX())
|
||||
return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
|
||||
|
|
|
@ -308,6 +308,32 @@ static bool checkOpenCLSubgroupExt(Sema &S, CallExpr *Call) {
|
|||
return false;
|
||||
}
|
||||
|
||||
static bool SemaOpenCLBuiltinNDRangeAndBlock(Sema &S, CallExpr *TheCall) {
|
||||
if (checkArgCount(S, TheCall, 2))
|
||||
return true;
|
||||
|
||||
if (checkOpenCLSubgroupExt(S, TheCall))
|
||||
return true;
|
||||
|
||||
// First argument is an ndrange_t type.
|
||||
Expr *NDRangeArg = TheCall->getArg(0);
|
||||
if (NDRangeArg->getType().getAsString() != "ndrange_t") {
|
||||
S.Diag(NDRangeArg->getLocStart(),
|
||||
diag::err_opencl_builtin_expected_type)
|
||||
<< TheCall->getDirectCallee() << "'ndrange_t'";
|
||||
return true;
|
||||
}
|
||||
|
||||
Expr *BlockArg = TheCall->getArg(1);
|
||||
if (!isBlockPointer(BlockArg)) {
|
||||
S.Diag(BlockArg->getLocStart(),
|
||||
diag::err_opencl_builtin_expected_type)
|
||||
<< TheCall->getDirectCallee() << "block";
|
||||
return true;
|
||||
}
|
||||
return checkOpenCLBlockArgs(S, BlockArg);
|
||||
}
|
||||
|
||||
/// OpenCL C v2.0, s6.13.17.6 - Check the argument to the
|
||||
/// get_kernel_work_group_size
|
||||
/// and get_kernel_preferred_work_group_size_multiple builtin functions.
|
||||
|
@ -1109,6 +1135,12 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
|
|||
if (SemaOpenCLBuiltinKernelWorkGroupSize(*this, TheCall))
|
||||
return ExprError();
|
||||
break;
|
||||
break;
|
||||
case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
|
||||
case Builtin::BIget_kernel_sub_group_count_for_ndrange:
|
||||
if (SemaOpenCLBuiltinNDRangeAndBlock(*this, TheCall))
|
||||
return ExprError();
|
||||
break;
|
||||
case Builtin::BI__builtin_os_log_format:
|
||||
case Builtin::BI__builtin_os_log_format_buffer_size:
|
||||
if (SemaBuiltinOSLogFormat(TheCall)) {
|
||||
|
|
|
@ -140,4 +140,9 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
|
|||
size = get_kernel_preferred_work_group_size_multiple(block_A);
|
||||
// COMMON: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*))
|
||||
size = get_kernel_preferred_work_group_size_multiple(block_G);
|
||||
|
||||
// COMMON: call i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(%struct.ndrange_t* {{.*}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* {{.*}} to i8 addrspace(1)*) to i8 addrspace(4)*))
|
||||
size = get_kernel_max_sub_group_size_for_ndrange(ndrange, ^(){});
|
||||
// COMMON: call i32 @__get_kernel_sub_group_count_for_ndrange_impl(%struct.ndrange_t* {{.*}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* {{.*}} to i8 addrspace(1)*) to i8 addrspace(4)*))
|
||||
size = get_kernel_sub_group_count_for_ndrange(ndrange, ^(){});
|
||||
}
|
||||
|
|
|
@ -209,3 +209,35 @@ kernel void work_group_size_tests() {
|
|||
size = get_kernel_preferred_work_group_size_multiple(1); // expected-error{{expected block argument}}
|
||||
size = get_kernel_preferred_work_group_size_multiple(block_A, 1); // expected-error{{too many arguments to function call, expected 1, have 2}}
|
||||
}
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_subgroups : enable
|
||||
|
||||
kernel void foo(global int *buf)
|
||||
{
|
||||
ndrange_t n;
|
||||
buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, ^(){});
|
||||
buf[0] = get_kernel_max_sub_group_size_for_ndrange(0, ^(){}); // expected-error{{illegal call to 'get_kernel_max_sub_group_size_for_ndrange', expected 'ndrange_t' argument type}}
|
||||
buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, 1); // expected-error{{illegal call to 'get_kernel_max_sub_group_size_for_ndrange', expected block argument type}}
|
||||
}
|
||||
|
||||
kernel void bar(global int *buf)
|
||||
{
|
||||
ndrange_t n;
|
||||
buf[0] = get_kernel_sub_group_count_for_ndrange(n, ^(){});
|
||||
buf[0] = get_kernel_sub_group_count_for_ndrange(0, ^(){}); // expected-error{{illegal call to 'get_kernel_sub_group_count_for_ndrange', expected 'ndrange_t' argument type}}
|
||||
buf[0] = get_kernel_sub_group_count_for_ndrange(n, 1); // expected-error{{illegal call to 'get_kernel_sub_group_count_for_ndrange', expected block argument type}}
|
||||
}
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_subgroups : disable
|
||||
|
||||
kernel void foo1(global int *buf)
|
||||
{
|
||||
ndrange_t n;
|
||||
buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, ^(){}); // expected-error {{use of declaration 'get_kernel_max_sub_group_size_for_ndrange' requires cl_khr_subgroups extension to be enabled}}
|
||||
}
|
||||
|
||||
kernel void bar1(global int *buf)
|
||||
{
|
||||
ndrange_t n;
|
||||
buf[0] = get_kernel_sub_group_count_for_ndrange(n, ^(){}); // expected-error {{use of declaration 'get_kernel_sub_group_count_for_ndrange' requires cl_khr_subgroups extension to be enabled}}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue