forked from OSchip/llvm-project
[AMDGPU] Flag new raw/struct atomic ops as source of divergence
Differential Revision: https://reviews.llvm.org/D60731 Change-Id: I821d93dec8b9cdd247b8172d92fb5e15340a9e7d llvm-svn: 358579
This commit is contained in:
parent
272f15abc3
commit
59e8bd3093
|
@ -71,6 +71,28 @@ def : SourceOfDivergence<int_amdgcn_buffer_atomic_and>;
|
|||
def : SourceOfDivergence<int_amdgcn_buffer_atomic_or>;
|
||||
def : SourceOfDivergence<int_amdgcn_buffer_atomic_xor>;
|
||||
def : SourceOfDivergence<int_amdgcn_buffer_atomic_cmpswap>;
|
||||
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_swap>;
|
||||
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_add>;
|
||||
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_sub>;
|
||||
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_smin>;
|
||||
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_umin>;
|
||||
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_smax>;
|
||||
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_umax>;
|
||||
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_and>;
|
||||
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_or>;
|
||||
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_xor>;
|
||||
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_cmpswap>;
|
||||
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_swap>;
|
||||
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_add>;
|
||||
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_sub>;
|
||||
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_smin>;
|
||||
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_umin>;
|
||||
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_smax>;
|
||||
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_umax>;
|
||||
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_and>;
|
||||
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_or>;
|
||||
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_xor>;
|
||||
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_cmpswap>;
|
||||
def : SourceOfDivergence<int_amdgcn_ps_live>;
|
||||
def : SourceOfDivergence<int_amdgcn_ds_swizzle>;
|
||||
def : SourceOfDivergence<int_amdgcn_ds_ordered_add>;
|
||||
|
|
|
@ -88,6 +88,182 @@ main_body:
|
|||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(
|
||||
define float @raw_buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(
|
||||
define float @raw_buffer_atomic_add(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.sub.i32(
|
||||
define float @raw_buffer_atomic_sub(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.raw.buffer.atomic.sub.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.smin.i32(
|
||||
define float @raw_buffer_atomic_smin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.raw.buffer.atomic.smin.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.umin.i32(
|
||||
define float @raw_buffer_atomic_umin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.raw.buffer.atomic.umin.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.smax.i32(
|
||||
define float @raw_buffer_atomic_smax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.raw.buffer.atomic.smax.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.umax.i32(
|
||||
define float @raw_buffer_atomic_umax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.raw.buffer.atomic.umax.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.and.i32(
|
||||
define float @raw_buffer_atomic_and(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.raw.buffer.atomic.and.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.or.i32(
|
||||
define float @raw_buffer_atomic_or(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.raw.buffer.atomic.or.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.xor.i32(
|
||||
define float @raw_buffer_atomic_xor(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.raw.buffer.atomic.xor.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(
|
||||
define float @raw_buffer_atomic_cmpswap(<4 x i32> inreg %rsrc, i32 inreg %data, i32 inreg %cmp) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %data, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32(
|
||||
define float @struct_buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(
|
||||
define float @struct_buffer_atomic_add(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(
|
||||
define float @struct_buffer_atomic_sub(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.smin.i32(
|
||||
define float @struct_buffer_atomic_smin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.struct.buffer.atomic.smin.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.umin.i32(
|
||||
define float @struct_buffer_atomic_umin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.struct.buffer.atomic.umin.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.smax.i32(
|
||||
define float @struct_buffer_atomic_smax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.struct.buffer.atomic.smax.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.umax.i32(
|
||||
define float @struct_buffer_atomic_umax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.struct.buffer.atomic.umax.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.and.i32(
|
||||
define float @struct_buffer_atomic_and(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.struct.buffer.atomic.and.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.or.i32(
|
||||
define float @struct_buffer_atomic_or(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.struct.buffer.atomic.or.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(
|
||||
define float @struct_buffer_atomic_xor(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(
|
||||
define float @struct_buffer_atomic_cmpswap(<4 x i32> inreg %rsrc, i32 inreg %data, i32 inreg %cmp) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %data, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
|
@ -100,4 +276,28 @@ declare i32 @llvm.amdgcn.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i1) #0
|
|||
declare i32 @llvm.amdgcn.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32, i32, <4 x i32>, i32, i32, i1) #0
|
||||
|
||||
declare i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.raw.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.raw.buffer.atomic.smin.i32(i32, <4 x i32>, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.raw.buffer.atomic.umin.i32(i32, <4 x i32>, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.raw.buffer.atomic.smax.i32(i32, <4 x i32>, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.raw.buffer.atomic.umax.i32(i32, <4 x i32>, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.raw.buffer.atomic.and.i32(i32, <4 x i32>, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.raw.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.raw.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32) #0
|
||||
|
||||
declare i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.struct.buffer.atomic.smin.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.struct.buffer.atomic.umin.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.struct.buffer.atomic.smax.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.struct.buffer.atomic.umax.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.struct.buffer.atomic.and.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.struct.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32, i32) #0
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
|
Loading…
Reference in New Issue