forked from OSchip/llvm-project
46 lines
1005 B
LLVM
46 lines
1005 B
LLVM
; RUN: llc -O2 -tail-dup-size=100 -enable-tail-merge=0 < %s | FileCheck %s
|
|
target triple = "nvptx64-nvidia-cuda"
|
|
|
|
declare void @foo()
|
|
declare void @llvm.nvvm.barrier0()
|
|
|
|
; syncthreads shouldn't be duplicated.
|
|
; CHECK: .func call_syncthreads
|
|
; CHECK: bar.sync
|
|
; CHECK-NOT: bar.sync
|
|
define void @call_syncthreads(i32* %a, i32* %b, i1 %cond, i1 %cond2) nounwind {
|
|
br i1 %cond, label %L1, label %L2
|
|
br i1 %cond2, label %Ret, label %L1
|
|
Ret:
|
|
ret void
|
|
L1:
|
|
store i32 0, i32* %a
|
|
br label %L42
|
|
L2:
|
|
store i32 1, i32* %a
|
|
br label %L42
|
|
L42:
|
|
call void @llvm.nvvm.barrier0()
|
|
br label %Ret
|
|
}
|
|
|
|
; Check that call_syncthreads really does trigger tail duplication.
|
|
; CHECK: .func call_foo
|
|
; CHECK: call
|
|
; CHECK: call
|
|
define void @call_foo(i32* %a, i32* %b, i1 %cond, i1 %cond2) nounwind {
|
|
br i1 %cond, label %L1, label %L2
|
|
br i1 %cond2, label %Ret, label %L1
|
|
Ret:
|
|
ret void
|
|
L1:
|
|
store i32 0, i32* %a
|
|
br label %L42
|
|
L2:
|
|
store i32 1, i32* %a
|
|
br label %L42
|
|
L42:
|
|
call void @foo()
|
|
br label %Ret
|
|
}
|