forked from OSchip/llvm-project
[CUDA] Conservatively mark inline asm as convergent.
Summary: This is particularly important because a some convergent CUDA intrinsics (e.g. __shfl_down) are implemented in terms of inline asm. Reviewers: tra Subscribers: cfe-commits Differential Revision: http://reviews.llvm.org/D20836 llvm-svn: 271336
This commit is contained in:
parent
0aeb313e79
commit
f179364341
|
@ -2054,6 +2054,14 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
|
|||
llvm::ConstantAsMetadata::get(Loc)));
|
||||
}
|
||||
|
||||
if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
|
||||
// Conservatively, mark all inline asm blocks in CUDA as convergent
|
||||
// (meaning, they may call an intrinsically convergent op, such as bar.sync,
|
||||
// and so can't have certain optimizations applied around them).
|
||||
Result->addAttribute(llvm::AttributeSet::FunctionIndex,
|
||||
llvm::Attribute::Convergent);
|
||||
}
|
||||
|
||||
// Extract all of the register value results from the asm.
|
||||
std::vector<llvm::Value*> RegResults;
|
||||
if (ResultRegTypes.size() == 1) {
|
||||
|
|
|
@ -25,6 +25,11 @@ __host__ __device__ void baz();
|
|||
__host__ __device__ void bar() {
|
||||
// DEVICE: call void @_Z3bazv() [[CALL_ATTR:#[0-9]+]]
|
||||
baz();
|
||||
// DEVICE: call i32 asm "trap;", "=l"() [[ASM_ATTR:#[0-9]+]]
|
||||
int x;
|
||||
asm ("trap;" : "=l"(x));
|
||||
// DEVICE: call void asm sideeffect "trap;", ""() [[ASM_ATTR:#[0-9]+]]
|
||||
asm volatile ("trap;");
|
||||
}
|
||||
|
||||
// DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
|
||||
|
@ -32,6 +37,7 @@ __host__ __device__ void bar() {
|
|||
// DEVICE-SAME: convergent
|
||||
// DEVICE-SAME: }
|
||||
// DEVICE: attributes [[CALL_ATTR]] = { convergent }
|
||||
// DEVICE: attributes [[ASM_ATTR]] = { convergent
|
||||
|
||||
// HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
|
||||
// HOST: attributes [[BAZ_ATTR]] = {
|
||||
|
|
Loading…
Reference in New Issue