forked from OSchip/llvm-project
[cuda] Allow using integral non-type template parameters as launch_bounds attribute arguments.
- Changed CUDALaunchBounds arguments from integers to Expr* so they can be saved in AST for instantiation. - Added support for template instantiation of launch_bounds attrubute. - Moved evaluation of launch_bounds arguments to NVPTXTargetCodeGenInfo:: SetTargetAttributes() where it can be done after template instantiation. - Added a warning on negative launch_bounds arguments. - Amended test cases. Differential Revision: http://reviews.llvm.org/D8985 llvm-svn: 235452
This commit is contained in:
parent
6e3344ed98
commit
7093e40641
|
@ -581,7 +581,7 @@ def CUDAInvalidTarget : InheritableAttr {
|
|||
|
||||
def CUDALaunchBounds : InheritableAttr {
|
||||
let Spellings = [GNU<"launch_bounds">];
|
||||
let Args = [IntArgument<"MaxThreads">, DefaultIntArgument<"MinBlocks", 0>];
|
||||
let Args = [ExprArgument<"MaxThreads">, ExprArgument<"MinBlocks", 1>];
|
||||
let LangOpts = [CUDA];
|
||||
let Subjects = SubjectList<[ObjCMethod, FunctionLike], WarnDiag,
|
||||
"ExpectedFunctionOrMethod">;
|
||||
|
|
|
@ -2120,6 +2120,9 @@ def note_objc_literal_comparison_isequal : Note<
|
|||
"use 'isEqual:' instead">;
|
||||
def err_attribute_argument_is_zero : Error<
|
||||
"%0 attribute must be greater than 0">;
|
||||
def warn_attribute_argument_n_negative : Warning<
|
||||
"%0 attribute parameter %1 is negative and will be ignored">,
|
||||
InGroup<CudaCompat>;
|
||||
def err_property_function_in_objc_container : Error<
|
||||
"use of Objective-C property in function nested in Objective-C "
|
||||
"container not supported, move function outside its container">;
|
||||
|
|
|
@ -7393,6 +7393,11 @@ public:
|
|||
void AddAlignValueAttr(SourceRange AttrRange, Decl *D, Expr *E,
|
||||
unsigned SpellingListIndex);
|
||||
|
||||
/// AddLaunchBoundsAttr - Adds a launch_bounds attribute to a particular
|
||||
/// declaration.
|
||||
void AddLaunchBoundsAttr(SourceRange AttrRange, Decl *D, Expr *MaxThreads,
|
||||
Expr *MinBlocks, unsigned SpellingListIndex);
|
||||
|
||||
// OpenMP directives and clauses.
|
||||
private:
|
||||
void *VarDataSharingAttributesStack;
|
||||
|
|
|
@ -5101,18 +5101,22 @@ SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
|
|||
// Create !{<func-ref>, metadata !"kernel", i32 1} node
|
||||
addNVVMMetadata(F, "kernel", 1);
|
||||
}
|
||||
if (FD->hasAttr<CUDALaunchBoundsAttr>()) {
|
||||
if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) {
|
||||
// Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
|
||||
addNVVMMetadata(F, "maxntidx",
|
||||
FD->getAttr<CUDALaunchBoundsAttr>()->getMaxThreads());
|
||||
// min blocks is a default argument for CUDALaunchBoundsAttr, so getting a
|
||||
// zero value from getMinBlocks either means it was not specified in
|
||||
// __launch_bounds__ or the user specified a 0 value. In both cases, we
|
||||
// don't have to add a PTX directive.
|
||||
int MinCTASM = FD->getAttr<CUDALaunchBoundsAttr>()->getMinBlocks();
|
||||
if (MinCTASM > 0) {
|
||||
// Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
|
||||
addNVVMMetadata(F, "minctasm", MinCTASM);
|
||||
llvm::APSInt MaxThreads(32);
|
||||
MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(M.getContext());
|
||||
if (MaxThreads > 0)
|
||||
addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue());
|
||||
|
||||
// min blocks is an optional argument for CUDALaunchBoundsAttr. If it was
|
||||
// not specified in __launch_bounds__ or if the user specified a 0 value,
|
||||
// we don't have to add a PTX directive.
|
||||
if (Attr->getMinBlocks()) {
|
||||
llvm::APSInt MinBlocks(32);
|
||||
MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(M.getContext());
|
||||
if (MinBlocks > 0)
|
||||
// Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
|
||||
addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3488,20 +3488,63 @@ bool Sema::CheckRegparmAttr(const AttributeList &Attr, unsigned &numParams) {
|
|||
return false;
|
||||
}
|
||||
|
||||
static void handleLaunchBoundsAttr(Sema &S, Decl *D,
|
||||
const AttributeList &Attr) {
|
||||
uint32_t MaxThreads, MinBlocks = 0;
|
||||
if (!checkUInt32Argument(S, Attr, Attr.getArgAsExpr(0), MaxThreads, 1))
|
||||
return;
|
||||
if (Attr.getNumArgs() > 1 && !checkUInt32Argument(S, Attr,
|
||||
Attr.getArgAsExpr(1),
|
||||
MinBlocks, 2))
|
||||
// Checks whether an argument of launch_bounds attribute is acceptable
|
||||
// May output an error.
|
||||
static bool checkLaunchBoundsArgument(Sema &S, Expr *E,
|
||||
const CUDALaunchBoundsAttr &Attr,
|
||||
const unsigned Idx) {
|
||||
|
||||
if (S.DiagnoseUnexpandedParameterPack(E))
|
||||
return false;
|
||||
|
||||
// Accept template arguments for now as they depend on something else.
|
||||
// We'll get to check them when they eventually get instantiated.
|
||||
if (E->isValueDependent())
|
||||
return true;
|
||||
|
||||
llvm::APSInt I(64);
|
||||
if (!E->isIntegerConstantExpr(I, S.Context)) {
|
||||
S.Diag(E->getExprLoc(), diag::err_attribute_argument_n_type)
|
||||
<< &Attr << Idx << AANT_ArgumentIntegerConstant << E->getSourceRange();
|
||||
return false;
|
||||
}
|
||||
// Make sure we can fit it in 32 bits.
|
||||
if (!I.isIntN(32)) {
|
||||
S.Diag(E->getExprLoc(), diag::err_ice_too_large) << I.toString(10, false)
|
||||
<< 32 << /* Unsigned */ 1;
|
||||
return false;
|
||||
}
|
||||
if (I < 0)
|
||||
S.Diag(E->getExprLoc(), diag::warn_attribute_argument_n_negative)
|
||||
<< &Attr << Idx << E->getSourceRange();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void Sema::AddLaunchBoundsAttr(SourceRange AttrRange, Decl *D, Expr *MaxThreads,
|
||||
Expr *MinBlocks, unsigned SpellingListIndex) {
|
||||
CUDALaunchBoundsAttr TmpAttr(AttrRange, Context, MaxThreads, MinBlocks,
|
||||
SpellingListIndex);
|
||||
|
||||
if (!checkLaunchBoundsArgument(*this, MaxThreads, TmpAttr, 0))
|
||||
return;
|
||||
|
||||
D->addAttr(::new (S.Context)
|
||||
CUDALaunchBoundsAttr(Attr.getRange(), S.Context,
|
||||
MaxThreads, MinBlocks,
|
||||
Attr.getAttributeSpellingListIndex()));
|
||||
if (MinBlocks && !checkLaunchBoundsArgument(*this, MinBlocks, TmpAttr, 1))
|
||||
return;
|
||||
|
||||
D->addAttr(::new (Context) CUDALaunchBoundsAttr(
|
||||
AttrRange, Context, MaxThreads, MinBlocks, SpellingListIndex));
|
||||
}
|
||||
|
||||
static void handleLaunchBoundsAttr(Sema &S, Decl *D,
|
||||
const AttributeList &Attr) {
|
||||
if (!checkAttributeAtLeastNumArgs(S, Attr, 1) ||
|
||||
!checkAttributeAtMostNumArgs(S, Attr, 2))
|
||||
return;
|
||||
|
||||
S.AddLaunchBoundsAttr(Attr.getRange(), D, Attr.getArgAsExpr(0),
|
||||
Attr.getNumArgs() > 1 ? Attr.getArgAsExpr(1) : nullptr,
|
||||
Attr.getAttributeSpellingListIndex());
|
||||
}
|
||||
|
||||
static void handleArgumentWithTypeTagAttr(Sema &S, Decl *D,
|
||||
|
|
|
@ -202,6 +202,31 @@ static void instantiateDependentEnableIfAttr(
|
|||
New->addAttr(EIA);
|
||||
}
|
||||
|
||||
// Constructs and adds to New a new instance of CUDALaunchBoundsAttr using
|
||||
// template A as the base and arguments from TemplateArgs.
|
||||
static void instantiateDependentCUDALaunchBoundsAttr(
|
||||
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
|
||||
const CUDALaunchBoundsAttr &Attr, Decl *New) {
|
||||
// The alignment expression is a constant expression.
|
||||
EnterExpressionEvaluationContext Unevaluated(S, Sema::ConstantEvaluated);
|
||||
|
||||
ExprResult Result = S.SubstExpr(Attr.getMaxThreads(), TemplateArgs);
|
||||
if (Result.isInvalid())
|
||||
return;
|
||||
Expr *MaxThreads = Result.getAs<Expr>();
|
||||
|
||||
Expr *MinBlocks = nullptr;
|
||||
if (Attr.getMinBlocks()) {
|
||||
Result = S.SubstExpr(Attr.getMinBlocks(), TemplateArgs);
|
||||
if (Result.isInvalid())
|
||||
return;
|
||||
MinBlocks = Result.getAs<Expr>();
|
||||
}
|
||||
|
||||
S.AddLaunchBoundsAttr(Attr.getLocation(), New, MaxThreads, MinBlocks,
|
||||
Attr.getSpellingListIndex());
|
||||
}
|
||||
|
||||
void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
|
||||
const Decl *Tmpl, Decl *New,
|
||||
LateInstantiatedAttrVec *LateAttrs,
|
||||
|
@ -233,6 +258,13 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
|
|||
continue;
|
||||
}
|
||||
|
||||
if (const CUDALaunchBoundsAttr *CUDALaunchBounds =
|
||||
dyn_cast<CUDALaunchBoundsAttr>(TmplAttr)) {
|
||||
instantiateDependentCUDALaunchBoundsAttr(*this, TemplateArgs,
|
||||
*CUDALaunchBounds, New);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Existing DLL attribute on the instantiation takes precedence.
|
||||
if (TmplAttr->getKind() == attr::DLLExport ||
|
||||
TmplAttr->getKind() == attr::DLLImport) {
|
||||
|
|
|
@ -28,3 +28,54 @@ Kernel2()
|
|||
}
|
||||
|
||||
// CHECK: !{{[0-9]+}} = !{void ()* @Kernel2, !"maxntidx", i32 256}
|
||||
|
||||
template <int max_threads_per_block>
|
||||
__global__ void
|
||||
__launch_bounds__(max_threads_per_block)
|
||||
Kernel3()
|
||||
{
|
||||
}
|
||||
|
||||
template void Kernel3<MAX_THREADS_PER_BLOCK>();
|
||||
// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel3{{.*}}, !"maxntidx", i32 256}
|
||||
|
||||
template <int max_threads_per_block, int min_blocks_per_mp>
|
||||
__global__ void
|
||||
__launch_bounds__(max_threads_per_block, min_blocks_per_mp)
|
||||
Kernel4()
|
||||
{
|
||||
}
|
||||
template void Kernel4<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
|
||||
|
||||
// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256}
|
||||
// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel4{{.*}}, !"minctasm", i32 2}
|
||||
|
||||
const int constint = 100;
|
||||
template <int max_threads_per_block, int min_blocks_per_mp>
|
||||
__global__ void
|
||||
__launch_bounds__(max_threads_per_block + constint,
|
||||
min_blocks_per_mp + max_threads_per_block)
|
||||
Kernel5()
|
||||
{
|
||||
}
|
||||
template void Kernel5<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
|
||||
|
||||
// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel5{{.*}}, !"maxntidx", i32 356}
|
||||
// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel5{{.*}}, !"minctasm", i32 258}
|
||||
|
||||
// Make sure we don't emit negative launch bounds values.
|
||||
__global__ void
|
||||
__launch_bounds__( -MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP )
|
||||
Kernel6()
|
||||
{
|
||||
}
|
||||
// CHECK-NOT: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel6{{.*}}, !"maxntidx",
|
||||
// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel6{{.*}}, !"minctasm",
|
||||
|
||||
__global__ void
|
||||
__launch_bounds__( MAX_THREADS_PER_BLOCK, -MIN_BLOCKS_PER_MP )
|
||||
Kernel7()
|
||||
{
|
||||
}
|
||||
// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel7{{.*}}, !"maxntidx",
|
||||
// CHECK-NOT: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel7{{.*}}, !"minctasm",
|
||||
|
|
|
@ -1,11 +1,49 @@
|
|||
// RUN: %clang_cc1 -fsyntax-only -verify %s
|
||||
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s
|
||||
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
__launch_bounds__(128, 7) void Test1(void);
|
||||
__launch_bounds__(128) void Test2(void);
|
||||
__launch_bounds__(128, 7) void Test2Args(void);
|
||||
__launch_bounds__(128) void Test1Arg(void);
|
||||
|
||||
__launch_bounds__(1, 2, 3) void Test3(void); // expected-error {{'launch_bounds' attribute takes no more than 2 arguments}}
|
||||
__launch_bounds__() void Test4(void); // expected-error {{'launch_bounds' attribute takes at least 1 argument}}
|
||||
__launch_bounds__(0xffffffff) void TestMaxArg(void);
|
||||
__launch_bounds__(0x100000000) void TestTooBigArg(void); // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
|
||||
__launch_bounds__(0x10000000000000000) void TestWayTooBigArg(void); // expected-error {{integer literal is too large to be represented in any integer type}}
|
||||
|
||||
int Test5 __launch_bounds__(128, 7); // expected-warning {{'launch_bounds' attribute only applies to functions and methods}}
|
||||
__launch_bounds__(-128, 7) void TestNegArg1(void); // expected-warning {{'launch_bounds' attribute parameter 0 is negative and will be ignored}}
|
||||
__launch_bounds__(128, -7) void TestNegArg2(void); // expected-warning {{'launch_bounds' attribute parameter 1 is negative and will be ignored}}
|
||||
|
||||
__launch_bounds__(1, 2, 3) void Test3Args(void); // expected-error {{'launch_bounds' attribute takes no more than 2 arguments}}
|
||||
__launch_bounds__() void TestNoArgs(void); // expected-error {{'launch_bounds' attribute takes at least 1 argument}}
|
||||
|
||||
int TestNoFunction __launch_bounds__(128, 7); // expected-warning {{'launch_bounds' attribute only applies to functions and methods}}
|
||||
|
||||
__launch_bounds__(true) void TestBool(void);
|
||||
__launch_bounds__(128.0) void TestFP(void); // expected-error {{'launch_bounds' attribute requires parameter 0 to be an integer constant}}
|
||||
__launch_bounds__((void*)0) void TestNullptr(void); // expected-error {{'launch_bounds' attribute requires parameter 0 to be an integer constant}}
|
||||
|
||||
int nonconstint = 256;
|
||||
__launch_bounds__(nonconstint) void TestNonConstInt(void); // expected-error {{'launch_bounds' attribute requires parameter 0 to be an integer constant}}
|
||||
|
||||
const int constint = 512;
|
||||
__launch_bounds__(constint) void TestConstInt(void);
|
||||
__launch_bounds__(constint * 2 + 3) void TestConstIntExpr(void);
|
||||
|
||||
template <int a, int b> __launch_bounds__(a, b) void TestTemplate2Args(void) {}
|
||||
template void TestTemplate2Args<128,7>(void);
|
||||
|
||||
template <int a> __launch_bounds__(a) void TestTemplate1Arg(void) {}
|
||||
template void TestTemplate1Arg<128>(void);
|
||||
|
||||
template <class a>
|
||||
__launch_bounds__(a) void TestTemplate1ArgClass(void) {} // expected-error {{'a' does not refer to a value}}
|
||||
// expected-note@-2 {{declared here}}
|
||||
|
||||
template <int a, int b, int c>
|
||||
__launch_bounds__(a + b, c + constint) void TestTemplateExpr(void) {}
|
||||
template void TestTemplateExpr<128+constint, 3, 7>(void);
|
||||
|
||||
template <int... Args>
|
||||
__launch_bounds__(Args) void TestTemplateVariadicArgs(void) {} // expected-error {{expression contains unexpanded parameter pack 'Args'}}
|
||||
|
||||
template <int... Args>
|
||||
__launch_bounds__(1, Args) void TestTemplateVariadicArgs2(void) {} // expected-error {{expression contains unexpanded parameter pack 'Args'}}
|
||||
|
|
Loading…
Reference in New Issue