forked from OSchip/llvm-project
AMDGPU: Fix atomic_inc/atomic_dec + ds_swizzle not being divergent
llvm-svn: 293504
This commit is contained in:
parent
e48f60aec8
commit
41c1499504
|
@ -249,6 +249,8 @@ static bool isIntrinsicSourceOfDivergence(const TargetIntrinsicInfo *TII,
|
|||
case Intrinsic::r600_read_tidig_x:
|
||||
case Intrinsic::r600_read_tidig_y:
|
||||
case Intrinsic::r600_read_tidig_z:
|
||||
case Intrinsic::amdgcn_atomic_inc:
|
||||
case Intrinsic::amdgcn_atomic_dec:
|
||||
case Intrinsic::amdgcn_image_atomic_swap:
|
||||
case Intrinsic::amdgcn_image_atomic_add:
|
||||
case Intrinsic::amdgcn_image_atomic_sub:
|
||||
|
@ -274,6 +276,7 @@ static bool isIntrinsicSourceOfDivergence(const TargetIntrinsicInfo *TII,
|
|||
case Intrinsic::amdgcn_buffer_atomic_xor:
|
||||
case Intrinsic::amdgcn_buffer_atomic_cmpswap:
|
||||
case Intrinsic::amdgcn_ps_live:
|
||||
case Intrinsic::amdgcn_ds_swizzle:
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -12,4 +12,34 @@ define {i32, i1} @test2(i32* %ptr, i32 %cmp, i32 %new) {
|
|||
ret {i32, i1} %orig
|
||||
}
|
||||
|
||||
; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 %val)
|
||||
define i32 @test_atomic_inc_i32(i32 addrspace(1)* %ptr, i32 %val) #0 {
|
||||
%ret = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 %val)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; CHECK: DIVERGENT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 %val)
|
||||
define i64 @test_atomic_inc_i64(i64 addrspace(1)* %ptr, i64 %val) #0 {
|
||||
%ret = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 %val)
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 %val)
|
||||
define i32 @test_atomic_dec_i32(i32 addrspace(1)* %ptr, i32 %val) #0 {
|
||||
%ret = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 %val)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; CHECK: DIVERGENT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 %val)
|
||||
define i64 @test_atomic_dec_i64(i64 addrspace(1)* %ptr, i64 %val) #0 {
|
||||
%ret = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 %val)
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* nocapture, i32) #1
|
||||
declare i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* nocapture, i64) #1
|
||||
declare i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* nocapture, i32) #1
|
||||
declare i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* nocapture, i64) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind argmemonly }
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
; RUN: opt -mtriple=amdgcn-- -analyze -divergence %s | FileCheck %s
|
||||
|
||||
; CHECK: DIVERGENT: %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0
|
||||
define void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) #0 {
|
||||
%swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0
|
||||
store i32 %swizzle, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) #1
|
||||
|
||||
attributes #0 = { nounwind convergent }
|
||||
attributes #1 = { nounwind readnone convergent }
|
Loading…
Reference in New Issue