forked from OSchip/llvm-project
[AMDGPU] Enable structurizer workarounds by default
Reviewed By: nhaehnle Differential Revision: https://reviews.llvm.org/D81211
This commit is contained in:
parent
1b6602275d
commit
d8f651d3e8
|
@ -195,7 +195,7 @@ static cl::opt<bool> EnableScalarIRPasses(
|
|||
|
||||
static cl::opt<bool> EnableStructurizerWorkarounds(
|
||||
"amdgpu-enable-structurizer-workarounds",
|
||||
cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(false),
|
||||
cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true),
|
||||
cl::Hidden);
|
||||
|
||||
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
|
||||
|
|
|
@ -21,7 +21,6 @@ define amdgpu_kernel void @infinite_loop(i32 addrspace(1)* %out) {
|
|||
; IR: loop:
|
||||
; IR-NEXT: store volatile i32 999, i32 addrspace(1)* [[OUT:%.*]], align 4
|
||||
; IR-NEXT: br label [[LOOP]]
|
||||
;
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
|
@ -59,7 +58,6 @@ define amdgpu_kernel void @infinite_loop_ret(i32 addrspace(1)* %out) {
|
|||
; IR-NEXT: br i1 true, label [[LOOP]], label [[UNIFIEDRETURNBLOCK]]
|
||||
; IR: UnifiedReturnBlock:
|
||||
; IR-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%cond = icmp eq i32 %tmp, 1
|
||||
|
@ -119,7 +117,6 @@ define amdgpu_kernel void @infinite_loops(i32 addrspace(1)* %out) {
|
|||
; IR-NEXT: br i1 true, label [[LOOP2]], label [[DUMMYRETURNBLOCK]]
|
||||
; IR: DummyReturnBlock:
|
||||
; IR-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
br i1 undef, label %loop1, label %loop2
|
||||
|
||||
|
@ -140,33 +137,29 @@ define amdgpu_kernel void @infinite_loop_nest_ret(i32 addrspace(1)* %out) {
|
|||
; SI-NEXT: s_cbranch_execz BB3_5
|
||||
; SI-NEXT: ; %bb.1: ; %outer_loop.preheader
|
||||
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
|
||||
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
|
||||
; SI-NEXT: v_cmp_ne_u32_e64 s[0:1], 3, v0
|
||||
; SI-NEXT: s_mov_b64 s[2:3], 0
|
||||
; SI-NEXT: s_mov_b32 s7, 0xf000
|
||||
; SI-NEXT: s_mov_b32 s6, -1
|
||||
; SI-NEXT: BB3_2: ; %outer_loop
|
||||
; SI-NEXT: ; =>This Loop Header: Depth=1
|
||||
; SI-NEXT: ; Child Loop BB3_3 Depth 2
|
||||
; SI-NEXT: s_and_b64 s[8:9], exec, vcc
|
||||
; SI-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3]
|
||||
; SI-NEXT: s_mov_b64 s[8:9], 0
|
||||
; SI-NEXT: s_mov_b64 s[2:3], 0
|
||||
; SI-NEXT: BB3_3: ; %inner_loop
|
||||
; SI-NEXT: ; Parent Loop BB3_2 Depth=1
|
||||
; SI-NEXT: ; => This Inner Loop Header: Depth=2
|
||||
; SI-NEXT: s_and_b64 s[10:11], exec, s[0:1]
|
||||
; SI-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9]
|
||||
; SI-NEXT: s_and_b64 s[8:9], exec, s[0:1]
|
||||
; SI-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3]
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SI-NEXT: s_andn2_b64 exec, exec, s[8:9]
|
||||
; SI-NEXT: s_cbranch_execnz BB3_3
|
||||
; SI-NEXT: ; %bb.4: ; %Flow
|
||||
; SI-NEXT: ; in Loop: Header=BB3_2 Depth=1
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; SI-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; SI-NEXT: s_cbranch_execnz BB3_2
|
||||
; SI-NEXT: s_cbranch_execnz BB3_3
|
||||
; SI-NEXT: ; %bb.4: ; %loop.exit.guard
|
||||
; SI-NEXT: ; in Loop: Header=BB3_2 Depth=1
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; SI-NEXT: s_and_b64 vcc, exec, 0
|
||||
; SI-NEXT: s_cbranch_vccz BB3_2
|
||||
; SI-NEXT: BB3_5: ; %UnifiedReturnBlock
|
||||
; SI-NEXT: s_endpgm
|
||||
; IR-LABEL: @infinite_loop_nest_ret(
|
||||
|
@ -184,7 +177,6 @@ define amdgpu_kernel void @infinite_loop_nest_ret(i32 addrspace(1)* %out) {
|
|||
; IR-NEXT: br i1 [[COND3]], label [[INNER_LOOP]], label [[OUTER_LOOP]]
|
||||
; IR: UnifiedReturnBlock:
|
||||
; IR-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%cond1 = icmp eq i32 %tmp, 1
|
||||
|
|
|
@ -46,52 +46,47 @@ define amdgpu_vs void @multi_else_break(<4 x float> %vec, i32 %ub, i32 %cont) {
|
|||
;
|
||||
; GCN-LABEL: multi_else_break:
|
||||
; GCN: ; %bb.0: ; %main_body
|
||||
; GCN-NEXT: s_mov_b64 s[2:3], 0
|
||||
; GCN-NEXT: s_mov_b64 s[0:1], 0
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: s_branch BB0_2
|
||||
; GCN-NEXT: BB0_1: ; %Flow2
|
||||
; GCN-NEXT: BB0_1: ; %loop.exit.guard
|
||||
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
|
||||
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN-NEXT: s_and_b64 s[0:1], exec, s[8:9]
|
||||
; GCN-NEXT: s_or_b64 s[2:3], s[0:1], s[2:3]
|
||||
; GCN-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GCN-NEXT: s_and_b64 s[2:3], exec, s[2:3]
|
||||
; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
|
||||
; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
|
||||
; GCN-NEXT: s_cbranch_execz BB0_6
|
||||
; GCN-NEXT: BB0_2: ; %LOOP.outer
|
||||
; GCN-NEXT: ; =>This Loop Header: Depth=1
|
||||
; GCN-NEXT: ; Child Loop BB0_4 Depth 2
|
||||
; GCN-NEXT: ; implicit-def: $sgpr6_sgpr7
|
||||
; GCN-NEXT: ; implicit-def: $sgpr8_sgpr9
|
||||
; GCN-NEXT: ; implicit-def: $sgpr2_sgpr3
|
||||
; GCN-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GCN-NEXT: s_branch BB0_4
|
||||
; GCN-NEXT: BB0_3: ; %Flow
|
||||
; GCN-NEXT: ; in Loop: Header=BB0_4 Depth=2
|
||||
; GCN-NEXT: s_or_b64 exec, exec, s[10:11]
|
||||
; GCN-NEXT: s_and_b64 s[0:1], exec, s[6:7]
|
||||
; GCN-NEXT: s_or_b64 s[4:5], s[0:1], s[4:5]
|
||||
; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GCN-NEXT: s_and_b64 s[8:9], exec, s[6:7]
|
||||
; GCN-NEXT: s_or_b64 s[4:5], s[8:9], s[4:5]
|
||||
; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
||||
; GCN-NEXT: s_cbranch_execz BB0_1
|
||||
; GCN-NEXT: BB0_4: ; %LOOP
|
||||
; GCN-NEXT: ; Parent Loop BB0_2 Depth=1
|
||||
; GCN-NEXT: ; => This Inner Loop Header: Depth=2
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, v1
|
||||
; GCN-NEXT: v_add_i32_e32 v1, vcc, 1, v2
|
||||
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, v2, v4
|
||||
; GCN-NEXT: s_or_b64 s[8:9], s[8:9], exec
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v1
|
||||
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, v1, v4
|
||||
; GCN-NEXT: s_or_b64 s[2:3], s[2:3], exec
|
||||
; GCN-NEXT: s_or_b64 s[6:7], s[6:7], exec
|
||||
; GCN-NEXT: s_and_saveexec_b64 s[10:11], vcc
|
||||
; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc
|
||||
; GCN-NEXT: s_cbranch_execz BB0_3
|
||||
; GCN-NEXT: ; %bb.5: ; %ENDIF
|
||||
; GCN-NEXT: ; in Loop: Header=BB0_4 Depth=2
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v5, v1
|
||||
; GCN-NEXT: v_cmp_ne_u32_e64 s[0:1], v5, v1
|
||||
; GCN-NEXT: s_andn2_b64 s[8:9], s[8:9], exec
|
||||
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, v5, v0
|
||||
; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
|
||||
; GCN-NEXT: s_andn2_b64 s[6:7], s[6:7], exec
|
||||
; GCN-NEXT: s_and_b64 s[12:13], vcc, exec
|
||||
; GCN-NEXT: s_and_b64 s[0:1], s[0:1], exec
|
||||
; GCN-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13]
|
||||
; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[0:1]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, v1
|
||||
; GCN-NEXT: s_and_b64 s[10:11], vcc, exec
|
||||
; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11]
|
||||
; GCN-NEXT: s_branch BB0_3
|
||||
; GCN-NEXT: BB0_6: ; %IF
|
||||
; GCN-NEXT: s_endpgm
|
||||
|
@ -204,7 +199,10 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
|
|||
; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 1, v1
|
||||
; GCN-NEXT: s_mov_b64 s[6:7], -1
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-NEXT: ; implicit-def: $sgpr8_sgpr9
|
||||
; GCN-NEXT: s_mov_b64 s[10:11], -1
|
||||
; GCN-NEXT: s_cbranch_vccnz BB1_6
|
||||
; GCN-NEXT: ; %bb.3: ; %LeafBlock1
|
||||
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
|
||||
|
@ -223,15 +221,11 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
|
|||
; GCN-NEXT: BB1_5: ; %Flow3
|
||||
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
|
||||
; GCN-NEXT: s_mov_b64 s[10:11], 0
|
||||
; GCN-NEXT: BB1_6: ; %Flow
|
||||
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, s[10:11]
|
||||
; GCN-NEXT: s_cbranch_vccz BB1_1
|
||||
; GCN-NEXT: s_branch BB1_7
|
||||
; GCN-NEXT: BB1_6: ; in Loop: Header=BB1_2 Depth=1
|
||||
; GCN-NEXT: s_mov_b64 s[8:9], 0
|
||||
; GCN-NEXT: s_mov_b64 s[6:7], -1
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, -1
|
||||
; GCN-NEXT: s_cbranch_execz BB1_1
|
||||
; GCN-NEXT: BB1_7: ; %LeafBlock
|
||||
; GCN-NEXT: ; %bb.7: ; %LeafBlock
|
||||
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, vcc
|
||||
|
@ -247,9 +241,10 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
|
|||
; GCN-NEXT: s_and_b64 s[10:11], vcc, exec
|
||||
; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11]
|
||||
; GCN-NEXT: s_branch BB1_1
|
||||
; GCN-NEXT: BB1_9: ; %Flow6
|
||||
; GCN-NEXT: BB1_9: ; %loop.exit.guard
|
||||
; GCN-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GCN-NEXT: s_and_saveexec_b64 s[0:1], s[4:5]
|
||||
; GCN-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
|
||||
; GCN-NEXT: s_endpgm
|
||||
bb:
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
|
|
@ -14,28 +14,36 @@ define amdgpu_kernel void @reduced_nested_loop_conditions(i64 addrspace(3)* noca
|
|||
; GCN-NEXT: s_load_dword s0, s[0:1], 0x9
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v0, 3, v0
|
||||
; GCN-NEXT: s_mov_b32 m0, -1
|
||||
; GCN-NEXT: s_mov_b64 s[2:3], -1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_add_i32_e32 v0, vcc, s0, v0
|
||||
; GCN-NEXT: ds_read_b64 v[0:1], v0
|
||||
; GCN-NEXT: s_mov_b32 s0, 0
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, 0
|
||||
; GCN-NEXT: BB0_1: ; %bb5
|
||||
; GCN-NEXT: s_and_b64 s[0:1], exec, -1
|
||||
; GCN-NEXT: s_branch BB0_2
|
||||
; GCN-NEXT: BB0_1: ; %bb10
|
||||
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
|
||||
; GCN-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GCN-NEXT: s_andn2_b64 vcc, exec, s[2:3]
|
||||
; GCN-NEXT: s_cbranch_vccz BB0_4
|
||||
; GCN-NEXT: BB0_2: ; %bb5
|
||||
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: s_cmp_lg_u32 s0, 1
|
||||
; GCN-NEXT: s_cbranch_scc0 BB0_3
|
||||
; GCN-NEXT: ; %bb.2: ; %bb10
|
||||
; GCN-NEXT: ; in Loop: Header=BB0_1 Depth=1
|
||||
; GCN-NEXT: ; implicit-def: $sgpr0
|
||||
; GCN-NEXT: s_mov_b64 vcc, s[0:1]
|
||||
; GCN-NEXT: s_cbranch_vccnz BB0_1
|
||||
; GCN-NEXT: s_branch BB0_5
|
||||
; GCN-NEXT: BB0_3: ; %bb8
|
||||
; GCN-NEXT: ; %bb.3: ; in Loop: Header=BB0_2 Depth=1
|
||||
; GCN-NEXT: s_mov_b64 s[4:5], -1
|
||||
; GCN-NEXT: s_andn2_b64 vcc, exec, s[2:3]
|
||||
; GCN-NEXT: s_cbranch_vccnz BB0_2
|
||||
; GCN-NEXT: BB0_4: ; %loop.exit.guard
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, s[4:5]
|
||||
; GCN-NEXT: s_cbranch_vccz BB0_7
|
||||
; GCN-NEXT: ; %bb.5: ; %bb8
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: ds_read_b32 v0, v0
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, 0
|
||||
; GCN-NEXT: BB0_4: ; %bb9
|
||||
; GCN-NEXT: BB0_6: ; %bb9
|
||||
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: s_cbranch_vccz BB0_4
|
||||
; GCN-NEXT: BB0_5: ; %DummyReturnBlock
|
||||
; GCN-NEXT: s_cbranch_vccz BB0_6
|
||||
; GCN-NEXT: BB0_7: ; %DummyReturnBlock
|
||||
; GCN-NEXT: s_endpgm
|
||||
; IR-LABEL: @reduced_nested_loop_conditions(
|
||||
; IR-NEXT: bb:
|
||||
|
@ -84,7 +92,6 @@ define amdgpu_kernel void @reduced_nested_loop_conditions(i64 addrspace(3)* noca
|
|||
; IR: bb23:
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP6]])
|
||||
; IR-NEXT: ret void
|
||||
;
|
||||
bb:
|
||||
%my.tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%my.tmp1 = getelementptr inbounds i64, i64 addrspace(3)* %arg, i32 %my.tmp
|
||||
|
@ -268,7 +275,6 @@ define amdgpu_kernel void @nested_loop_conditions(i64 addrspace(1)* nocapture %a
|
|||
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP7]])
|
||||
; IR-NEXT: store volatile i32 0, i32 addrspace(1)* undef
|
||||
; IR-NEXT: ret void
|
||||
;
|
||||
bb:
|
||||
%my.tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%my.tmp1 = zext i32 %my.tmp to i64
|
||||
|
|
|
@ -166,45 +166,72 @@ define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32
|
|||
; SI-NEXT: s_mov_b32 s6, -1
|
||||
; SI-NEXT: buffer_load_dword v0, off, s[4:7], 0
|
||||
; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9
|
||||
; SI-NEXT: s_load_dword s4, s[0:1], 0xc
|
||||
; SI-NEXT: s_brev_b32 s5, 44
|
||||
; SI-NEXT: s_load_dword s8, s[0:1], 0xc
|
||||
; SI-NEXT: s_brev_b32 s9, 44
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: v_cmp_gt_i32_e64 s[0:1], s2, 0
|
||||
; SI-NEXT: v_cmp_lt_i32_e64 s[2:3], s3, 4
|
||||
; SI-NEXT: s_or_b64 s[8:9], s[0:1], s[2:3]
|
||||
; SI-NEXT: s_and_b64 s[0:1], exec, s[2:3]
|
||||
; SI-NEXT: s_and_b64 s[2:3], exec, s[8:9]
|
||||
; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], s2, 1
|
||||
; SI-NEXT: v_cmp_lt_i32_e64 s[4:5], s3, 4
|
||||
; SI-NEXT: v_cmp_gt_i32_e64 s[2:3], s3, 3
|
||||
; SI-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3]
|
||||
; SI-NEXT: s_and_b64 s[0:1], exec, s[4:5]
|
||||
; SI-NEXT: s_and_b64 s[2:3], exec, s[2:3]
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: v_cmp_lt_f32_e64 s[8:9], |v0|, s5
|
||||
; SI-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s9
|
||||
; SI-NEXT: s_and_b64 s[4:5], exec, s[4:5]
|
||||
; SI-NEXT: v_mov_b32_e32 v0, 3
|
||||
; SI-NEXT: BB3_1: ; %while.cond
|
||||
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; SI-NEXT: s_mov_b64 vcc, s[0:1]
|
||||
; SI-NEXT: s_cbranch_vccz BB3_5
|
||||
; SI-NEXT: ; %bb.2: ; %convex.exit
|
||||
; SI-NEXT: ; in Loop: Header=BB3_1 Depth=1
|
||||
; SI-NEXT: s_mov_b64 vcc, s[2:3]
|
||||
; SI-NEXT: s_branch BB3_4
|
||||
; SI-NEXT: BB3_1: ; %Flow6
|
||||
; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1
|
||||
; SI-NEXT: s_mov_b64 s[10:11], 0
|
||||
; SI-NEXT: BB3_2: ; %Flow5
|
||||
; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1
|
||||
; SI-NEXT: s_mov_b64 s[14:15], 0
|
||||
; SI-NEXT: BB3_3: ; %Flow
|
||||
; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1
|
||||
; SI-NEXT: s_and_b64 vcc, exec, s[12:13]
|
||||
; SI-NEXT: s_cbranch_vccnz BB3_8
|
||||
; SI-NEXT: ; %bb.3: ; %if.end
|
||||
; SI-NEXT: ; in Loop: Header=BB3_1 Depth=1
|
||||
; SI-NEXT: s_andn2_b64 vcc, exec, s[8:9]
|
||||
; SI-NEXT: s_cbranch_vccnz BB3_1
|
||||
; SI-NEXT: ; %bb.4: ; %if.else
|
||||
; SI-NEXT: ; in Loop: Header=BB3_1 Depth=1
|
||||
; SI-NEXT: BB3_4: ; %while.cond
|
||||
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; SI-NEXT: s_mov_b64 s[14:15], -1
|
||||
; SI-NEXT: s_mov_b64 s[10:11], -1
|
||||
; SI-NEXT: s_mov_b64 s[12:13], -1
|
||||
; SI-NEXT: s_mov_b64 vcc, s[0:1]
|
||||
; SI-NEXT: s_cbranch_vccz BB3_3
|
||||
; SI-NEXT: ; %bb.5: ; %convex.exit
|
||||
; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1
|
||||
; SI-NEXT: s_mov_b64 s[10:11], -1
|
||||
; SI-NEXT: s_mov_b64 s[12:13], -1
|
||||
; SI-NEXT: s_mov_b64 vcc, s[2:3]
|
||||
; SI-NEXT: s_cbranch_vccz BB3_2
|
||||
; SI-NEXT: ; %bb.6: ; %if.end
|
||||
; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1
|
||||
; SI-NEXT: s_mov_b64 s[12:13], -1
|
||||
; SI-NEXT: s_mov_b64 vcc, s[4:5]
|
||||
; SI-NEXT: s_cbranch_vccz BB3_1
|
||||
; SI-NEXT: ; %bb.7: ; %if.else
|
||||
; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1
|
||||
; SI-NEXT: s_mov_b64 s[12:13], 0
|
||||
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SI-NEXT: s_branch BB3_1
|
||||
; SI-NEXT: BB3_5: ; %for.cond.preheader
|
||||
; SI-NEXT: BB3_8: ; %loop.exit.guard4
|
||||
; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1
|
||||
; SI-NEXT: s_and_b64 vcc, exec, s[10:11]
|
||||
; SI-NEXT: s_cbranch_vccz BB3_4
|
||||
; SI-NEXT: ; %bb.9: ; %loop.exit.guard
|
||||
; SI-NEXT: s_and_b64 vcc, exec, s[14:15]
|
||||
; SI-NEXT: s_cbranch_vccz BB3_13
|
||||
; SI-NEXT: ; %bb.10: ; %for.cond.preheader
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_mov_b32_e32 v0, 0x3e8
|
||||
; SI-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0
|
||||
; SI-NEXT: v_cmp_lt_i32_e32 vcc, s8, v0
|
||||
; SI-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; SI-NEXT: s_cbranch_vccz BB3_8
|
||||
; SI-NEXT: ; %bb.6: ; %for.body
|
||||
; SI-NEXT: s_cbranch_vccz BB3_13
|
||||
; SI-NEXT: ; %bb.11: ; %for.body
|
||||
; SI-NEXT: s_and_b64 vcc, exec, 0
|
||||
; SI-NEXT: BB3_7: ; %self.loop
|
||||
; SI-NEXT: BB3_12: ; %self.loop
|
||||
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; SI-NEXT: s_cbranch_vccz BB3_7
|
||||
; SI-NEXT: BB3_8: ; %DummyReturnBlock
|
||||
; SI-NEXT: s_cbranch_vccz BB3_12
|
||||
; SI-NEXT: BB3_13: ; %DummyReturnBlock
|
||||
; SI-NEXT: s_endpgm
|
||||
;
|
||||
; FLAT-LABEL: loop_land_info_assert:
|
||||
|
@ -213,44 +240,71 @@ define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32
|
|||
; FLAT-NEXT: s_mov_b32 s6, -1
|
||||
; FLAT-NEXT: buffer_load_dword v0, off, s[4:7], 0
|
||||
; FLAT-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
|
||||
; FLAT-NEXT: s_load_dword s4, s[0:1], 0x30
|
||||
; FLAT-NEXT: s_brev_b32 s5, 44
|
||||
; FLAT-NEXT: s_load_dword s8, s[0:1], 0x30
|
||||
; FLAT-NEXT: s_brev_b32 s9, 44
|
||||
; FLAT-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; FLAT-NEXT: v_cmp_gt_i32_e64 s[0:1], s2, 0
|
||||
; FLAT-NEXT: v_cmp_lt_i32_e64 s[2:3], s3, 4
|
||||
; FLAT-NEXT: s_or_b64 s[8:9], s[0:1], s[2:3]
|
||||
; FLAT-NEXT: s_and_b64 s[0:1], exec, s[2:3]
|
||||
; FLAT-NEXT: s_and_b64 s[2:3], exec, s[8:9]
|
||||
; FLAT-NEXT: v_cmp_lt_i32_e64 s[0:1], s2, 1
|
||||
; FLAT-NEXT: v_cmp_lt_i32_e64 s[4:5], s3, 4
|
||||
; FLAT-NEXT: v_cmp_gt_i32_e64 s[2:3], s3, 3
|
||||
; FLAT-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3]
|
||||
; FLAT-NEXT: s_and_b64 s[0:1], exec, s[4:5]
|
||||
; FLAT-NEXT: s_and_b64 s[2:3], exec, s[2:3]
|
||||
; FLAT-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLAT-NEXT: v_cmp_lt_f32_e64 s[8:9], |v0|, s5
|
||||
; FLAT-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s9
|
||||
; FLAT-NEXT: s_and_b64 s[4:5], exec, s[4:5]
|
||||
; FLAT-NEXT: v_mov_b32_e32 v0, 3
|
||||
; FLAT-NEXT: BB3_1: ; %while.cond
|
||||
; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; FLAT-NEXT: s_mov_b64 vcc, s[0:1]
|
||||
; FLAT-NEXT: s_cbranch_vccz BB3_5
|
||||
; FLAT-NEXT: ; %bb.2: ; %convex.exit
|
||||
; FLAT-NEXT: ; in Loop: Header=BB3_1 Depth=1
|
||||
; FLAT-NEXT: s_mov_b64 vcc, s[2:3]
|
||||
; FLAT-NEXT: s_branch BB3_4
|
||||
; FLAT-NEXT: BB3_1: ; %Flow6
|
||||
; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1
|
||||
; FLAT-NEXT: s_mov_b64 s[10:11], 0
|
||||
; FLAT-NEXT: BB3_2: ; %Flow5
|
||||
; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1
|
||||
; FLAT-NEXT: s_mov_b64 s[14:15], 0
|
||||
; FLAT-NEXT: BB3_3: ; %Flow
|
||||
; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1
|
||||
; FLAT-NEXT: s_and_b64 vcc, exec, s[12:13]
|
||||
; FLAT-NEXT: s_cbranch_vccnz BB3_8
|
||||
; FLAT-NEXT: ; %bb.3: ; %if.end
|
||||
; FLAT-NEXT: ; in Loop: Header=BB3_1 Depth=1
|
||||
; FLAT-NEXT: s_andn2_b64 vcc, exec, s[8:9]
|
||||
; FLAT-NEXT: s_cbranch_vccnz BB3_1
|
||||
; FLAT-NEXT: ; %bb.4: ; %if.else
|
||||
; FLAT-NEXT: ; in Loop: Header=BB3_1 Depth=1
|
||||
; FLAT-NEXT: BB3_4: ; %while.cond
|
||||
; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; FLAT-NEXT: s_mov_b64 s[14:15], -1
|
||||
; FLAT-NEXT: s_mov_b64 s[10:11], -1
|
||||
; FLAT-NEXT: s_mov_b64 s[12:13], -1
|
||||
; FLAT-NEXT: s_mov_b64 vcc, s[0:1]
|
||||
; FLAT-NEXT: s_cbranch_vccz BB3_3
|
||||
; FLAT-NEXT: ; %bb.5: ; %convex.exit
|
||||
; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1
|
||||
; FLAT-NEXT: s_mov_b64 s[10:11], -1
|
||||
; FLAT-NEXT: s_mov_b64 s[12:13], -1
|
||||
; FLAT-NEXT: s_mov_b64 vcc, s[2:3]
|
||||
; FLAT-NEXT: s_cbranch_vccz BB3_2
|
||||
; FLAT-NEXT: ; %bb.6: ; %if.end
|
||||
; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1
|
||||
; FLAT-NEXT: s_mov_b64 s[12:13], -1
|
||||
; FLAT-NEXT: s_mov_b64 vcc, s[4:5]
|
||||
; FLAT-NEXT: s_cbranch_vccz BB3_1
|
||||
; FLAT-NEXT: ; %bb.7: ; %if.else
|
||||
; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1
|
||||
; FLAT-NEXT: s_mov_b64 s[12:13], 0
|
||||
; FLAT-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; FLAT-NEXT: s_branch BB3_1
|
||||
; FLAT-NEXT: BB3_5: ; %for.cond.preheader
|
||||
; FLAT-NEXT: BB3_8: ; %loop.exit.guard4
|
||||
; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1
|
||||
; FLAT-NEXT: s_and_b64 vcc, exec, s[10:11]
|
||||
; FLAT-NEXT: s_cbranch_vccz BB3_4
|
||||
; FLAT-NEXT: ; %bb.9: ; %loop.exit.guard
|
||||
; FLAT-NEXT: s_and_b64 vcc, exec, s[14:15]
|
||||
; FLAT-NEXT: s_cbranch_vccz BB3_13
|
||||
; FLAT-NEXT: ; %bb.10: ; %for.cond.preheader
|
||||
; FLAT-NEXT: v_mov_b32_e32 v0, 0x3e8
|
||||
; FLAT-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0
|
||||
; FLAT-NEXT: v_cmp_lt_i32_e32 vcc, s8, v0
|
||||
; FLAT-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; FLAT-NEXT: s_cbranch_vccz BB3_8
|
||||
; FLAT-NEXT: ; %bb.6: ; %for.body
|
||||
; FLAT-NEXT: s_cbranch_vccz BB3_13
|
||||
; FLAT-NEXT: ; %bb.11: ; %for.body
|
||||
; FLAT-NEXT: s_and_b64 vcc, exec, 0
|
||||
; FLAT-NEXT: BB3_7: ; %self.loop
|
||||
; FLAT-NEXT: BB3_12: ; %self.loop
|
||||
; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; FLAT-NEXT: s_cbranch_vccz BB3_7
|
||||
; FLAT-NEXT: BB3_8: ; %DummyReturnBlock
|
||||
; FLAT-NEXT: s_cbranch_vccz BB3_12
|
||||
; FLAT-NEXT: BB3_13: ; %DummyReturnBlock
|
||||
; FLAT-NEXT: s_endpgm
|
||||
entry:
|
||||
%cmp = icmp sgt i32 %c0, 0
|
||||
|
|
Loading…
Reference in New Issue