[CUDA] Conservatively mark inline asm as convergent.

Summary:
This is particularly important because a some convergent CUDA intrinsics
(e.g.  __shfl_down) are implemented in terms of inline asm.

Reviewers: tra

Subscribers: cfe-commits

Differential Revision: http://reviews.llvm.org/D20836

llvm-svn: 271336
This commit is contained in:
Justin Lebar 2016-05-31 21:27:13 +00:00
parent 0aeb313e79
commit f179364341
2 changed files with 14 additions and 0 deletions

View File

@ -2054,6 +2054,14 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
llvm::ConstantAsMetadata::get(Loc)));
}
if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
// Conservatively, mark all inline asm blocks in CUDA as convergent
// (meaning, they may call an intrinsically convergent op, such as bar.sync,
// and so can't have certain optimizations applied around them).
Result->addAttribute(llvm::AttributeSet::FunctionIndex,
llvm::Attribute::Convergent);
}
// Extract all of the register value results from the asm.
std::vector<llvm::Value*> RegResults;
if (ResultRegTypes.size() == 1) {

View File

@ -25,6 +25,11 @@ __host__ __device__ void baz();
__host__ __device__ void bar() {
// DEVICE: call void @_Z3bazv() [[CALL_ATTR:#[0-9]+]]
baz();
// DEVICE: call i32 asm "trap;", "=l"() [[ASM_ATTR:#[0-9]+]]
int x;
asm ("trap;" : "=l"(x));
// DEVICE: call void asm sideeffect "trap;", ""() [[ASM_ATTR:#[0-9]+]]
asm volatile ("trap;");
}
// DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
@ -32,6 +37,7 @@ __host__ __device__ void bar() {
// DEVICE-SAME: convergent
// DEVICE-SAME: }
// DEVICE: attributes [[CALL_ATTR]] = { convergent }
// DEVICE: attributes [[ASM_ATTR]] = { convergent
// HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
// HOST: attributes [[BAZ_ATTR]] = {