forked from OSchip/llvm-project
AMDGPU: Don't required structured CFG
The structured CFG is just an aid to inserting exec mask modification instructions, once that is done we don't really need it anymore. We also do not analyze blocks with terminators that modify exec, so this should only be impacting true branches. llvm-svn: 288744
This commit is contained in:
parent
9642b36e91
commit
ad55ee5869
|
@ -162,7 +162,6 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
|
|||
FS, Options, getEffectiveRelocModel(RM), CM, OptLevel),
|
||||
TLOF(createTLOF(getTargetTriple())),
|
||||
IntrinsicInfo() {
|
||||
setRequiresStructuredCFG(true);
|
||||
initAsmInfo();
|
||||
}
|
||||
|
||||
|
@ -191,7 +190,9 @@ R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT,
|
|||
TargetOptions Options,
|
||||
Optional<Reloc::Model> RM,
|
||||
CodeModel::Model CM, CodeGenOpt::Level OL)
|
||||
: AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
|
||||
: AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
|
||||
setRequiresStructuredCFG(true);
|
||||
}
|
||||
|
||||
const R600Subtarget *R600TargetMachine::getSubtargetImpl(
|
||||
const Function &F) const {
|
||||
|
|
|
@ -8,13 +8,15 @@
|
|||
; GCNNOOPT: v_writelane_b32
|
||||
; GCN: s_cbranch_scc1 [[END:BB[0-9]+_[0-9]+]]
|
||||
|
||||
|
||||
; GCN: ; BB#1
|
||||
; GCNNOOPT: v_readlane_b32
|
||||
; GCNNOOPT: v_readlane_b32
|
||||
; GCN: buffer_store_dword
|
||||
; GCN: s_endpgm
|
||||
; GCNOPT-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
||||
; TODO: This waitcnt can be eliminated
|
||||
|
||||
; GCN: {{^}}[[END]]
|
||||
; GCN: {{^}}[[END]]:
|
||||
; GCN: s_endpgm
|
||||
define void @test_branch(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val) #0 {
|
||||
%cmp = icmp ne i32 %val, 0
|
||||
|
@ -35,9 +37,10 @@ end:
|
|||
; GCN: s_cbranch_vccnz [[END:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: buffer_store_dword
|
||||
; GCN: s_endpgm
|
||||
; GCNOPT-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
||||
; TODO: This waitcnt can be eliminated
|
||||
|
||||
; GCN: {{^}}[[END]]
|
||||
; GCN: {{^}}[[END]]:
|
||||
; GCN: s_endpgm
|
||||
define void @test_brcc_i1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i1 %val) #0 {
|
||||
%cmp0 = icmp ne i1 %val, 0
|
||||
|
|
|
@ -12,9 +12,10 @@
|
|||
; GCN: s_cbranch_vccnz
|
||||
|
||||
; GCN: one{{$}}
|
||||
; SI: v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[A_F32]]
|
||||
; GCN: buffer_store_short v[[A_F16]]
|
||||
; GCN: s_endpgm
|
||||
; SI: v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[A_F32]]
|
||||
; SI: s_branch
|
||||
; VI: buffer_store_short
|
||||
; VI: s_endpgm
|
||||
|
||||
; GCN: two{{$}}
|
||||
; SI: v_cvt_f16_f32_e32 v[[B_F16:[0-9]+]], v[[B_F32]]
|
||||
|
@ -47,17 +48,19 @@ two:
|
|||
; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
|
||||
; SI: v_cmp_ngt_f32_e32 vcc, v[[B_F32]], v[[A_F32]]
|
||||
; VI: v_cmp_nle_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
|
||||
; GCN: s_cbranch_vccnz
|
||||
; SI: s_cbranch_vccz
|
||||
; VI: s_cbranch_vccnz
|
||||
|
||||
; GCN: one{{$}}
|
||||
; VI: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x3800{{$}}
|
||||
; GCN: buffer_store_short v[[A_F16]]
|
||||
; GCN: s_endpgm
|
||||
; VI: one{{$}}
|
||||
; VI: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x380{{0|1}}{{$}}
|
||||
|
||||
; GCN: two{{$}}
|
||||
; SI: v_cvt_f16_f32_e32 v[[B_F16:[0-9]+]], v[[B_F32]]
|
||||
; GCN: buffer_store_short v[[B_F16]]
|
||||
; GCN: s_endpgm
|
||||
|
||||
; SI: one{{$}}
|
||||
; SI: buffer_store_short v[[A_F16]]
|
||||
; SI: s_endpgm
|
||||
|
||||
define void @br_cc_f16_imm_a(
|
||||
half addrspace(1)* %r,
|
||||
half addrspace(1)* %b) {
|
||||
|
@ -87,8 +90,6 @@ two:
|
|||
|
||||
; GCN: one{{$}}
|
||||
; SI: v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[A_F32]]
|
||||
; GCN: buffer_store_short v[[A_F16]]
|
||||
; GCN: s_endpgm
|
||||
|
||||
; GCN: two{{$}}
|
||||
; VI: v_mov_b32_e32 v[[B_F16:[0-9]+]], 0x3800{{$}}
|
||||
|
|
|
@ -475,14 +475,13 @@ ret:
|
|||
|
||||
; GCN-LABEL: {{^}}long_branch_hang:
|
||||
; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 6
|
||||
; GCN-NEXT: s_cbranch_scc1 [[LONG_BR_0:BB[0-9]+_[0-9]+]]
|
||||
; GCN-NEXT: s_branch [[SHORTB:BB[0-9]+_[0-9]+]]
|
||||
; GCN-NEXT: s_cbranch_scc0 [[LONG_BR_0:BB[0-9]+_[0-9]+]]
|
||||
; GCN-NEXT: BB{{[0-9]+_[0-9]+}}:
|
||||
|
||||
; GCN-NEXT: [[LONG_BR_0]]:
|
||||
; GCN: s_add_u32 vcc_lo, vcc_lo, [[LONG_BR_DEST0:BB[0-9]+_[0-9]+]]-(
|
||||
; GCN: s_setpc_b64
|
||||
|
||||
; GCN: [[SHORTB]]:
|
||||
; GCN-NEXT: [[LONG_BR_0]]:
|
||||
; GCN-DAG: v_cmp_lt_i32
|
||||
; GCN-DAG: v_cmp_gt_i32
|
||||
; GCN: s_cbranch_vccnz
|
||||
|
@ -492,7 +491,6 @@ ret:
|
|||
|
||||
; GCN: [[LONG_BR_DEST0]]
|
||||
; GCN: v_cmp_ne_u32_e32
|
||||
; GCN-NEXT: ; implicit-def
|
||||
; GCN-NEXT: s_cbranch_vccz
|
||||
; GCN: s_setpc_b64
|
||||
|
||||
|
|
|
@ -506,11 +506,13 @@ bb:
|
|||
bb1:
|
||||
%tmp2 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
|
||||
%tmp3 = extractelement <4 x float> %tmp2, i32 undef
|
||||
call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp2) #0 ; Prevent block optimize out
|
||||
br label %bb7
|
||||
|
||||
bb4:
|
||||
%tmp5 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
|
||||
%tmp6 = extractelement <4 x float> %tmp5, i32 undef
|
||||
call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp5) #0 ; Prevent block optimize out
|
||||
br label %bb7
|
||||
|
||||
bb7:
|
||||
|
@ -554,11 +556,13 @@ bb:
|
|||
bb1: ; preds = %bb
|
||||
%tmp2 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
|
||||
%tmp3 = insertelement <4 x float> %tmp2, float %val0, i32 undef
|
||||
call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp3) #0 ; Prevent block optimize out
|
||||
br label %bb7
|
||||
|
||||
bb4: ; preds = %bb
|
||||
%tmp5 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
|
||||
%tmp6 = insertelement <4 x float> %tmp5, float %val0, i32 undef
|
||||
call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp6) #0 ; Prevent block optimize out
|
||||
br label %bb7
|
||||
|
||||
bb7: ; preds = %bb4, %bb1
|
||||
|
@ -745,6 +749,8 @@ bb8: ; preds = %bb2
|
|||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
declare void @llvm.amdgcn.s.barrier() #2
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
attributes #2 = { nounwind convergent }
|
||||
|
|
|
@ -1,8 +1,5 @@
|
|||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; FIXME: Enabling critical edge splitting will fix this.
|
||||
; XFAIL: *
|
||||
|
||||
; Make sure that m0 is not reinitialized in the loop.
|
||||
|
||||
; GCN-LABEL: {{^}}copy_local_to_global_loop_m0_init:
|
||||
|
@ -12,7 +9,9 @@
|
|||
; GCN: s_mov_b32 m0, -1
|
||||
|
||||
; GCN: BB0_2:
|
||||
; GCN-NOT: m0
|
||||
; GCN: ds_read_b32
|
||||
; GCN-NOT: m0
|
||||
; GCN: buffer_store_dword
|
||||
|
||||
; GCN: s_cbranch_scc0 BB0_2
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
;
|
||||
;
|
||||
; Most SALU instructions ignore control flow, so we need to make sure
|
||||
|
@ -9,13 +9,54 @@
|
|||
; about instructions in different blocks overwriting each other.
|
||||
; SI-LABEL: {{^}}sgpr_if_else_salu_br:
|
||||
; SI: s_add
|
||||
; SI: s_add
|
||||
; SI: s_branch
|
||||
|
||||
; SI: s_sub
|
||||
|
||||
define void @sgpr_if_else_salu_br(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
|
||||
entry:
|
||||
%0 = icmp eq i32 %a, 0
|
||||
br i1 %0, label %if, label %else
|
||||
|
||||
if:
|
||||
%1 = sub i32 %b, %c
|
||||
br label %endif
|
||||
|
||||
else:
|
||||
%2 = add i32 %d, %e
|
||||
br label %endif
|
||||
|
||||
endif:
|
||||
%3 = phi i32 [%1, %if], [%2, %else]
|
||||
%4 = add i32 %3, %a
|
||||
store i32 %4, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}sgpr_if_else_salu_br_opt:
|
||||
; SI: s_cmp_lg_u32
|
||||
; SI: s_cbranch_scc0 [[IF:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; SI: ; BB#1: ; %else
|
||||
; SI: s_load_dword [[LOAD0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xe
|
||||
; SI: s_load_dword [[LOAD1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xf
|
||||
; SI-NOT: add
|
||||
; SI: s_branch [[ENDIF:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; SI: [[IF]]: ; %if
|
||||
; SI: s_load_dword [[LOAD0]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
||||
; SI: s_load_dword [[LOAD1]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
|
||||
; SI-NOT: add
|
||||
|
||||
; SI: [[ENDIF]]: ; %endif
|
||||
; SI: s_add_i32 s{{[0-9]+}}, [[LOAD0]], [[LOAD1]]
|
||||
; SI: buffer_store_dword
|
||||
; SI-NEXT: s_endpgm
|
||||
define void @sgpr_if_else_salu_br_opt(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
|
||||
entry:
|
||||
%0 = icmp eq i32 %a, 0
|
||||
br i1 %0, label %if, label %else
|
||||
|
||||
if:
|
||||
%1 = add i32 %b, %c
|
||||
br label %endif
|
||||
|
@ -67,7 +108,7 @@ endif:
|
|||
; SI: v_cmp_gt_i32_e32 [[CMP_IF:vcc]], 0, [[AVAL]]
|
||||
; SI: v_cndmask_b32_e64 [[V_CMP:v[0-9]+]], 0, -1, [[CMP_IF]]
|
||||
|
||||
; SI: BB2_2:
|
||||
; SI: BB{{[0-9]+}}_2:
|
||||
; SI: buffer_load_dword [[AVAL:v[0-9]+]]
|
||||
; SI: v_cmp_eq_u32_e32 [[CMP_ELSE:vcc]], 0, [[AVAL]]
|
||||
; SI: v_cndmask_b32_e64 [[V_CMP]], 0, -1, [[CMP_ELSE]]
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
; This test checks that no VGPR to SGPR copies are created by the register
|
||||
; allocator.
|
||||
|
@ -223,8 +223,15 @@ declare i32 @llvm.SI.packf16(float, float) #1
|
|||
; an assertion failure.
|
||||
|
||||
; CHECK-LABEL: {{^}}sample_v3:
|
||||
; CHECK: image_sample
|
||||
; CHECK: image_sample
|
||||
; CHECK: v_mov_b32_e32 v[[SAMPLE_LO:[0-9]+]], 11
|
||||
; CHECK: v_mov_b32_e32 v[[SAMPLE_HI:[0-9]+]], 13
|
||||
; CHECK: s_branch
|
||||
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[SAMPLE_LO:[0-9]+]], 5
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[SAMPLE_HI:[0-9]+]], 7
|
||||
|
||||
; CHECK: BB{{[0-9]+_[0-9]+}}:
|
||||
; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[SAMPLE_LO]]:[[SAMPLE_HI]]{{\]}}
|
||||
; CHECK: exp
|
||||
; CHECK: s_endpgm
|
||||
define amdgpu_ps void @sample_v3([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
|
||||
|
@ -241,14 +248,14 @@ entry:
|
|||
br i1 %tmp27, label %if, label %else
|
||||
|
||||
if: ; preds = %entry
|
||||
%val.if = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> zeroinitializer, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
|
||||
%val.if = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 11, i32 13>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
|
||||
%val.if.0 = extractelement <4 x float> %val.if, i32 0
|
||||
%val.if.1 = extractelement <4 x float> %val.if, i32 1
|
||||
%val.if.2 = extractelement <4 x float> %val.if, i32 2
|
||||
br label %endif
|
||||
|
||||
else: ; preds = %entry
|
||||
%val.else = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 1, i32 0>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
|
||||
%val.else = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 5, i32 7>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
|
||||
%val.else.0 = extractelement <4 x float> %val.else, i32 0
|
||||
%val.else.1 = extractelement <4 x float> %val.else, i32 1
|
||||
%val.else.2 = extractelement <4 x float> %val.else, i32 2
|
||||
|
@ -317,9 +324,15 @@ ENDIF69: ; preds = %LOOP68
|
|||
|
||||
; This test checks that image_sample resource descriptors aren't loaded into
|
||||
; vgprs. The verifier will fail if this happens.
|
||||
; CHECK-LABEL:{{^}}sample_rsrc:
|
||||
; CHECK: image_sample
|
||||
; CHECK: image_sample
|
||||
; CHECK-LABEL:{{^}}sample_rsrc
|
||||
|
||||
; CHECK: s_cmp_eq_u32
|
||||
; CHECK: s_cbranch_scc0 [[END:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; CHECK: v_add_i32_e32 v[[ADD:[0-9]+]], vcc, 1, v{{[0-9]+}}
|
||||
|
||||
; [[END]]:
|
||||
; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]{{\]}}
|
||||
; CHECK: s_endpgm
|
||||
define amdgpu_ps void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
|
||||
bb:
|
||||
|
|
|
@ -308,10 +308,8 @@ end:
|
|||
; CHECK: s_mov_b64 exec, 0
|
||||
|
||||
; CHECK: [[SKIPKILL]]:
|
||||
; CHECK: v_cmp_nge_f32
|
||||
; CHECK-NEXT: s_cbranch_vccz [[UNREACHABLE:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; CHECK: [[UNREACHABLE]]:
|
||||
; CHECK: v_cmp_nge_f32_e32 vcc
|
||||
; CHECK-NEXT: BB#3: ; %bb5
|
||||
; CHECK-NEXT: .Lfunc_end{{[0-9]+}}
|
||||
define amdgpu_ps void @no_skip_no_successors(float inreg %arg, float inreg %arg1) #0 {
|
||||
bb:
|
||||
|
|
|
@ -197,15 +197,15 @@ if.end: ; preds = %if.else, %if.then
|
|||
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
|
||||
; GCN-NEXT: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]]
|
||||
|
||||
; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
|
||||
; GCN: buffer_store_dword [[TWO]]
|
||||
; GCN: v_mov_b32_e32 [[IMM_REG:v[0-9]+]], 2
|
||||
; GCN: s_branch [[ENDIF_LABEL:[0-9_A-Za-z]+]]
|
||||
|
||||
; GCN: [[IF_LABEL]]:
|
||||
; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
|
||||
; GCN: buffer_store_dword [[ONE]]
|
||||
; GCN-NEXT: v_mov_b32_e32 [[IMM_REG]], 1
|
||||
|
||||
; GCN-NEXT: [[ENDIF_LABEL]]:
|
||||
; GCN: buffer_store_dword [[IMM_REG]]
|
||||
|
||||
; GCN: [[ENDIF_LABEL]]:
|
||||
; GCN: v_mov_b32_e32 [[THREE:v[0-9]+]], 3
|
||||
; GCN: buffer_store_dword [[THREE]]
|
||||
; GCN: s_endpgm
|
||||
|
|
Loading…
Reference in New Issue