[AMDGPU] Enable structurizer workarounds by default

Reviewed By: nhaehnle

Differential Revision: https://reviews.llvm.org/D81211
This commit is contained in:
Sameer Sahasrabuddhe 2020-06-09 13:14:15 +05:30
parent 1b6602275d
commit d8f651d3e8
5 changed files with 167 additions and 120 deletions

View File

@ -195,7 +195,7 @@ static cl::opt<bool> EnableScalarIRPasses(
static cl::opt<bool> EnableStructurizerWorkarounds(
"amdgpu-enable-structurizer-workarounds",
cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(false),
cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true),
cl::Hidden);
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {

View File

@ -21,7 +21,6 @@ define amdgpu_kernel void @infinite_loop(i32 addrspace(1)* %out) {
; IR: loop:
; IR-NEXT: store volatile i32 999, i32 addrspace(1)* [[OUT:%.*]], align 4
; IR-NEXT: br label [[LOOP]]
;
entry:
br label %loop
@ -59,7 +58,6 @@ define amdgpu_kernel void @infinite_loop_ret(i32 addrspace(1)* %out) {
; IR-NEXT: br i1 true, label [[LOOP]], label [[UNIFIEDRETURNBLOCK]]
; IR: UnifiedReturnBlock:
; IR-NEXT: ret void
;
entry:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
%cond = icmp eq i32 %tmp, 1
@ -119,7 +117,6 @@ define amdgpu_kernel void @infinite_loops(i32 addrspace(1)* %out) {
; IR-NEXT: br i1 true, label [[LOOP2]], label [[DUMMYRETURNBLOCK]]
; IR: DummyReturnBlock:
; IR-NEXT: ret void
;
entry:
br i1 undef, label %loop1, label %loop2
@ -140,33 +137,29 @@ define amdgpu_kernel void @infinite_loop_nest_ret(i32 addrspace(1)* %out) {
; SI-NEXT: s_cbranch_execz BB3_5
; SI-NEXT: ; %bb.1: ; %outer_loop.preheader
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
; SI-NEXT: v_cmp_ne_u32_e64 s[0:1], 3, v0
; SI-NEXT: s_mov_b64 s[2:3], 0
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: BB3_2: ; %outer_loop
; SI-NEXT: ; =>This Loop Header: Depth=1
; SI-NEXT: ; Child Loop BB3_3 Depth 2
; SI-NEXT: s_and_b64 s[8:9], exec, vcc
; SI-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3]
; SI-NEXT: s_mov_b64 s[8:9], 0
; SI-NEXT: s_mov_b64 s[2:3], 0
; SI-NEXT: BB3_3: ; %inner_loop
; SI-NEXT: ; Parent Loop BB3_2 Depth=1
; SI-NEXT: ; => This Inner Loop Header: Depth=2
; SI-NEXT: s_and_b64 s[10:11], exec, s[0:1]
; SI-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9]
; SI-NEXT: s_and_b64 s[8:9], exec, s[0:1]
; SI-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3]
; SI-NEXT: s_waitcnt expcnt(0)
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_andn2_b64 exec, exec, s[8:9]
; SI-NEXT: s_cbranch_execnz BB3_3
; SI-NEXT: ; %bb.4: ; %Flow
; SI-NEXT: ; in Loop: Header=BB3_2 Depth=1
; SI-NEXT: s_or_b64 exec, exec, s[8:9]
; SI-NEXT: s_andn2_b64 exec, exec, s[2:3]
; SI-NEXT: s_cbranch_execnz BB3_2
; SI-NEXT: s_cbranch_execnz BB3_3
; SI-NEXT: ; %bb.4: ; %loop.exit.guard
; SI-NEXT: ; in Loop: Header=BB3_2 Depth=1
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
; SI-NEXT: s_and_b64 vcc, exec, 0
; SI-NEXT: s_cbranch_vccz BB3_2
; SI-NEXT: BB3_5: ; %UnifiedReturnBlock
; SI-NEXT: s_endpgm
; IR-LABEL: @infinite_loop_nest_ret(
@ -184,7 +177,6 @@ define amdgpu_kernel void @infinite_loop_nest_ret(i32 addrspace(1)* %out) {
; IR-NEXT: br i1 [[COND3]], label [[INNER_LOOP]], label [[OUTER_LOOP]]
; IR: UnifiedReturnBlock:
; IR-NEXT: ret void
;
entry:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
%cond1 = icmp eq i32 %tmp, 1

View File

@ -46,52 +46,47 @@ define amdgpu_vs void @multi_else_break(<4 x float> %vec, i32 %ub, i32 %cont) {
;
; GCN-LABEL: multi_else_break:
; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[2:3], 0
; GCN-NEXT: s_mov_b64 s[0:1], 0
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_branch BB0_2
; GCN-NEXT: BB0_1: ; %Flow2
; GCN-NEXT: BB0_1: ; %loop.exit.guard
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN-NEXT: s_and_b64 s[0:1], exec, s[8:9]
; GCN-NEXT: s_or_b64 s[2:3], s[0:1], s[2:3]
; GCN-NEXT: s_andn2_b64 exec, exec, s[2:3]
; GCN-NEXT: s_and_b64 s[2:3], exec, s[2:3]
; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GCN-NEXT: s_cbranch_execz BB0_6
; GCN-NEXT: BB0_2: ; %LOOP.outer
; GCN-NEXT: ; =>This Loop Header: Depth=1
; GCN-NEXT: ; Child Loop BB0_4 Depth 2
; GCN-NEXT: ; implicit-def: $sgpr6_sgpr7
; GCN-NEXT: ; implicit-def: $sgpr8_sgpr9
; GCN-NEXT: ; implicit-def: $sgpr2_sgpr3
; GCN-NEXT: s_mov_b64 s[4:5], 0
; GCN-NEXT: v_mov_b32_e32 v1, v0
; GCN-NEXT: s_branch BB0_4
; GCN-NEXT: BB0_3: ; %Flow
; GCN-NEXT: ; in Loop: Header=BB0_4 Depth=2
; GCN-NEXT: s_or_b64 exec, exec, s[10:11]
; GCN-NEXT: s_and_b64 s[0:1], exec, s[6:7]
; GCN-NEXT: s_or_b64 s[4:5], s[0:1], s[4:5]
; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
; GCN-NEXT: s_and_b64 s[8:9], exec, s[6:7]
; GCN-NEXT: s_or_b64 s[4:5], s[8:9], s[4:5]
; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5]
; GCN-NEXT: s_cbranch_execz BB0_1
; GCN-NEXT: BB0_4: ; %LOOP
; GCN-NEXT: ; Parent Loop BB0_2 Depth=1
; GCN-NEXT: ; => This Inner Loop Header: Depth=2
; GCN-NEXT: v_mov_b32_e32 v2, v1
; GCN-NEXT: v_add_i32_e32 v1, vcc, 1, v2
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, v2, v4
; GCN-NEXT: s_or_b64 s[8:9], s[8:9], exec
; GCN-NEXT: v_mov_b32_e32 v1, v0
; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v1
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, v1, v4
; GCN-NEXT: s_or_b64 s[2:3], s[2:3], exec
; GCN-NEXT: s_or_b64 s[6:7], s[6:7], exec
; GCN-NEXT: s_and_saveexec_b64 s[10:11], vcc
; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GCN-NEXT: s_cbranch_execz BB0_3
; GCN-NEXT: ; %bb.5: ; %ENDIF
; GCN-NEXT: ; in Loop: Header=BB0_4 Depth=2
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v5, v1
; GCN-NEXT: v_cmp_ne_u32_e64 s[0:1], v5, v1
; GCN-NEXT: s_andn2_b64 s[8:9], s[8:9], exec
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, v5, v0
; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
; GCN-NEXT: s_andn2_b64 s[6:7], s[6:7], exec
; GCN-NEXT: s_and_b64 s[12:13], vcc, exec
; GCN-NEXT: s_and_b64 s[0:1], s[0:1], exec
; GCN-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13]
; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[0:1]
; GCN-NEXT: v_mov_b32_e32 v0, v1
; GCN-NEXT: s_and_b64 s[10:11], vcc, exec
; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11]
; GCN-NEXT: s_branch BB0_3
; GCN-NEXT: BB0_6: ; %IF
; GCN-NEXT: s_endpgm
@ -204,7 +199,10 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 1, v1
; GCN-NEXT: s_mov_b64 s[6:7], -1
; GCN-NEXT: s_and_b64 vcc, exec, vcc
; GCN-NEXT: ; implicit-def: $sgpr8_sgpr9
; GCN-NEXT: s_mov_b64 s[10:11], -1
; GCN-NEXT: s_cbranch_vccnz BB1_6
; GCN-NEXT: ; %bb.3: ; %LeafBlock1
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
@ -223,15 +221,11 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
; GCN-NEXT: BB1_5: ; %Flow3
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: s_mov_b64 s[10:11], 0
; GCN-NEXT: BB1_6: ; %Flow
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: s_and_b64 vcc, exec, s[10:11]
; GCN-NEXT: s_cbranch_vccz BB1_1
; GCN-NEXT: s_branch BB1_7
; GCN-NEXT: BB1_6: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: s_mov_b64 s[8:9], 0
; GCN-NEXT: s_mov_b64 s[6:7], -1
; GCN-NEXT: s_and_b64 vcc, exec, -1
; GCN-NEXT: s_cbranch_execz BB1_1
; GCN-NEXT: BB1_7: ; %LeafBlock
; GCN-NEXT: ; %bb.7: ; %LeafBlock
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
; GCN-NEXT: s_and_b64 vcc, exec, vcc
@ -247,9 +241,10 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
; GCN-NEXT: s_and_b64 s[10:11], vcc, exec
; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11]
; GCN-NEXT: s_branch BB1_1
; GCN-NEXT: BB1_9: ; %Flow6
; GCN-NEXT: BB1_9: ; %loop.exit.guard
; GCN-NEXT: s_or_b64 exec, exec, s[0:1]
; GCN-NEXT: s_and_saveexec_b64 s[0:1], s[4:5]
; GCN-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GCN-NEXT: s_endpgm
bb:
%id = call i32 @llvm.amdgcn.workitem.id.x()

View File

@ -14,28 +14,36 @@ define amdgpu_kernel void @reduced_nested_loop_conditions(i64 addrspace(3)* noca
; GCN-NEXT: s_load_dword s0, s[0:1], 0x9
; GCN-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GCN-NEXT: s_mov_b32 m0, -1
; GCN-NEXT: s_mov_b64 s[2:3], -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_add_i32_e32 v0, vcc, s0, v0
; GCN-NEXT: ds_read_b64 v[0:1], v0
; GCN-NEXT: s_mov_b32 s0, 0
; GCN-NEXT: s_and_b64 vcc, exec, 0
; GCN-NEXT: BB0_1: ; %bb5
; GCN-NEXT: s_and_b64 s[0:1], exec, -1
; GCN-NEXT: s_branch BB0_2
; GCN-NEXT: BB0_1: ; %bb10
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
; GCN-NEXT: s_mov_b64 s[4:5], 0
; GCN-NEXT: s_andn2_b64 vcc, exec, s[2:3]
; GCN-NEXT: s_cbranch_vccz BB0_4
; GCN-NEXT: BB0_2: ; %bb5
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: s_cmp_lg_u32 s0, 1
; GCN-NEXT: s_cbranch_scc0 BB0_3
; GCN-NEXT: ; %bb.2: ; %bb10
; GCN-NEXT: ; in Loop: Header=BB0_1 Depth=1
; GCN-NEXT: ; implicit-def: $sgpr0
; GCN-NEXT: s_mov_b64 vcc, s[0:1]
; GCN-NEXT: s_cbranch_vccnz BB0_1
; GCN-NEXT: s_branch BB0_5
; GCN-NEXT: BB0_3: ; %bb8
; GCN-NEXT: ; %bb.3: ; in Loop: Header=BB0_2 Depth=1
; GCN-NEXT: s_mov_b64 s[4:5], -1
; GCN-NEXT: s_andn2_b64 vcc, exec, s[2:3]
; GCN-NEXT: s_cbranch_vccnz BB0_2
; GCN-NEXT: BB0_4: ; %loop.exit.guard
; GCN-NEXT: s_and_b64 vcc, exec, s[4:5]
; GCN-NEXT: s_cbranch_vccz BB0_7
; GCN-NEXT: ; %bb.5: ; %bb8
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: ds_read_b32 v0, v0
; GCN-NEXT: s_and_b64 vcc, exec, 0
; GCN-NEXT: BB0_4: ; %bb9
; GCN-NEXT: BB0_6: ; %bb9
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: s_cbranch_vccz BB0_4
; GCN-NEXT: BB0_5: ; %DummyReturnBlock
; GCN-NEXT: s_cbranch_vccz BB0_6
; GCN-NEXT: BB0_7: ; %DummyReturnBlock
; GCN-NEXT: s_endpgm
; IR-LABEL: @reduced_nested_loop_conditions(
; IR-NEXT: bb:
@ -84,7 +92,6 @@ define amdgpu_kernel void @reduced_nested_loop_conditions(i64 addrspace(3)* noca
; IR: bb23:
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP6]])
; IR-NEXT: ret void
;
bb:
%my.tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%my.tmp1 = getelementptr inbounds i64, i64 addrspace(3)* %arg, i32 %my.tmp
@ -268,7 +275,6 @@ define amdgpu_kernel void @nested_loop_conditions(i64 addrspace(1)* nocapture %a
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP7]])
; IR-NEXT: store volatile i32 0, i32 addrspace(1)* undef
; IR-NEXT: ret void
;
bb:
%my.tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%my.tmp1 = zext i32 %my.tmp to i64

View File

@ -166,45 +166,72 @@ define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: buffer_load_dword v0, off, s[4:7], 0
; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9
; SI-NEXT: s_load_dword s4, s[0:1], 0xc
; SI-NEXT: s_brev_b32 s5, 44
; SI-NEXT: s_load_dword s8, s[0:1], 0xc
; SI-NEXT: s_brev_b32 s9, 44
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: v_cmp_gt_i32_e64 s[0:1], s2, 0
; SI-NEXT: v_cmp_lt_i32_e64 s[2:3], s3, 4
; SI-NEXT: s_or_b64 s[8:9], s[0:1], s[2:3]
; SI-NEXT: s_and_b64 s[0:1], exec, s[2:3]
; SI-NEXT: s_and_b64 s[2:3], exec, s[8:9]
; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], s2, 1
; SI-NEXT: v_cmp_lt_i32_e64 s[4:5], s3, 4
; SI-NEXT: v_cmp_gt_i32_e64 s[2:3], s3, 3
; SI-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3]
; SI-NEXT: s_and_b64 s[0:1], exec, s[4:5]
; SI-NEXT: s_and_b64 s[2:3], exec, s[2:3]
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: v_cmp_lt_f32_e64 s[8:9], |v0|, s5
; SI-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s9
; SI-NEXT: s_and_b64 s[4:5], exec, s[4:5]
; SI-NEXT: v_mov_b32_e32 v0, 3
; SI-NEXT: BB3_1: ; %while.cond
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: s_mov_b64 vcc, s[0:1]
; SI-NEXT: s_cbranch_vccz BB3_5
; SI-NEXT: ; %bb.2: ; %convex.exit
; SI-NEXT: ; in Loop: Header=BB3_1 Depth=1
; SI-NEXT: s_mov_b64 vcc, s[2:3]
; SI-NEXT: s_branch BB3_4
; SI-NEXT: BB3_1: ; %Flow6
; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1
; SI-NEXT: s_mov_b64 s[10:11], 0
; SI-NEXT: BB3_2: ; %Flow5
; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1
; SI-NEXT: s_mov_b64 s[14:15], 0
; SI-NEXT: BB3_3: ; %Flow
; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1
; SI-NEXT: s_and_b64 vcc, exec, s[12:13]
; SI-NEXT: s_cbranch_vccnz BB3_8
; SI-NEXT: ; %bb.3: ; %if.end
; SI-NEXT: ; in Loop: Header=BB3_1 Depth=1
; SI-NEXT: s_andn2_b64 vcc, exec, s[8:9]
; SI-NEXT: s_cbranch_vccnz BB3_1
; SI-NEXT: ; %bb.4: ; %if.else
; SI-NEXT: ; in Loop: Header=BB3_1 Depth=1
; SI-NEXT: BB3_4: ; %while.cond
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: s_mov_b64 s[14:15], -1
; SI-NEXT: s_mov_b64 s[10:11], -1
; SI-NEXT: s_mov_b64 s[12:13], -1
; SI-NEXT: s_mov_b64 vcc, s[0:1]
; SI-NEXT: s_cbranch_vccz BB3_3
; SI-NEXT: ; %bb.5: ; %convex.exit
; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1
; SI-NEXT: s_mov_b64 s[10:11], -1
; SI-NEXT: s_mov_b64 s[12:13], -1
; SI-NEXT: s_mov_b64 vcc, s[2:3]
; SI-NEXT: s_cbranch_vccz BB3_2
; SI-NEXT: ; %bb.6: ; %if.end
; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1
; SI-NEXT: s_mov_b64 s[12:13], -1
; SI-NEXT: s_mov_b64 vcc, s[4:5]
; SI-NEXT: s_cbranch_vccz BB3_1
; SI-NEXT: ; %bb.7: ; %if.else
; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1
; SI-NEXT: s_mov_b64 s[12:13], 0
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_branch BB3_1
; SI-NEXT: BB3_5: ; %for.cond.preheader
; SI-NEXT: BB3_8: ; %loop.exit.guard4
; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1
; SI-NEXT: s_and_b64 vcc, exec, s[10:11]
; SI-NEXT: s_cbranch_vccz BB3_4
; SI-NEXT: ; %bb.9: ; %loop.exit.guard
; SI-NEXT: s_and_b64 vcc, exec, s[14:15]
; SI-NEXT: s_cbranch_vccz BB3_13
; SI-NEXT: ; %bb.10: ; %for.cond.preheader
; SI-NEXT: s_waitcnt expcnt(0)
; SI-NEXT: v_mov_b32_e32 v0, 0x3e8
; SI-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0
; SI-NEXT: v_cmp_lt_i32_e32 vcc, s8, v0
; SI-NEXT: s_and_b64 vcc, exec, vcc
; SI-NEXT: s_cbranch_vccz BB3_8
; SI-NEXT: ; %bb.6: ; %for.body
; SI-NEXT: s_cbranch_vccz BB3_13
; SI-NEXT: ; %bb.11: ; %for.body
; SI-NEXT: s_and_b64 vcc, exec, 0
; SI-NEXT: BB3_7: ; %self.loop
; SI-NEXT: BB3_12: ; %self.loop
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: s_cbranch_vccz BB3_7
; SI-NEXT: BB3_8: ; %DummyReturnBlock
; SI-NEXT: s_cbranch_vccz BB3_12
; SI-NEXT: BB3_13: ; %DummyReturnBlock
; SI-NEXT: s_endpgm
;
; FLAT-LABEL: loop_land_info_assert:
@ -213,44 +240,71 @@ define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32
; FLAT-NEXT: s_mov_b32 s6, -1
; FLAT-NEXT: buffer_load_dword v0, off, s[4:7], 0
; FLAT-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; FLAT-NEXT: s_load_dword s4, s[0:1], 0x30
; FLAT-NEXT: s_brev_b32 s5, 44
; FLAT-NEXT: s_load_dword s8, s[0:1], 0x30
; FLAT-NEXT: s_brev_b32 s9, 44
; FLAT-NEXT: s_waitcnt lgkmcnt(0)
; FLAT-NEXT: v_cmp_gt_i32_e64 s[0:1], s2, 0
; FLAT-NEXT: v_cmp_lt_i32_e64 s[2:3], s3, 4
; FLAT-NEXT: s_or_b64 s[8:9], s[0:1], s[2:3]
; FLAT-NEXT: s_and_b64 s[0:1], exec, s[2:3]
; FLAT-NEXT: s_and_b64 s[2:3], exec, s[8:9]
; FLAT-NEXT: v_cmp_lt_i32_e64 s[0:1], s2, 1
; FLAT-NEXT: v_cmp_lt_i32_e64 s[4:5], s3, 4
; FLAT-NEXT: v_cmp_gt_i32_e64 s[2:3], s3, 3
; FLAT-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3]
; FLAT-NEXT: s_and_b64 s[0:1], exec, s[4:5]
; FLAT-NEXT: s_and_b64 s[2:3], exec, s[2:3]
; FLAT-NEXT: s_waitcnt vmcnt(0)
; FLAT-NEXT: v_cmp_lt_f32_e64 s[8:9], |v0|, s5
; FLAT-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s9
; FLAT-NEXT: s_and_b64 s[4:5], exec, s[4:5]
; FLAT-NEXT: v_mov_b32_e32 v0, 3
; FLAT-NEXT: BB3_1: ; %while.cond
; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1
; FLAT-NEXT: s_mov_b64 vcc, s[0:1]
; FLAT-NEXT: s_cbranch_vccz BB3_5
; FLAT-NEXT: ; %bb.2: ; %convex.exit
; FLAT-NEXT: ; in Loop: Header=BB3_1 Depth=1
; FLAT-NEXT: s_mov_b64 vcc, s[2:3]
; FLAT-NEXT: s_branch BB3_4
; FLAT-NEXT: BB3_1: ; %Flow6
; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1
; FLAT-NEXT: s_mov_b64 s[10:11], 0
; FLAT-NEXT: BB3_2: ; %Flow5
; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1
; FLAT-NEXT: s_mov_b64 s[14:15], 0
; FLAT-NEXT: BB3_3: ; %Flow
; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1
; FLAT-NEXT: s_and_b64 vcc, exec, s[12:13]
; FLAT-NEXT: s_cbranch_vccnz BB3_8
; FLAT-NEXT: ; %bb.3: ; %if.end
; FLAT-NEXT: ; in Loop: Header=BB3_1 Depth=1
; FLAT-NEXT: s_andn2_b64 vcc, exec, s[8:9]
; FLAT-NEXT: s_cbranch_vccnz BB3_1
; FLAT-NEXT: ; %bb.4: ; %if.else
; FLAT-NEXT: ; in Loop: Header=BB3_1 Depth=1
; FLAT-NEXT: BB3_4: ; %while.cond
; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1
; FLAT-NEXT: s_mov_b64 s[14:15], -1
; FLAT-NEXT: s_mov_b64 s[10:11], -1
; FLAT-NEXT: s_mov_b64 s[12:13], -1
; FLAT-NEXT: s_mov_b64 vcc, s[0:1]
; FLAT-NEXT: s_cbranch_vccz BB3_3
; FLAT-NEXT: ; %bb.5: ; %convex.exit
; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1
; FLAT-NEXT: s_mov_b64 s[10:11], -1
; FLAT-NEXT: s_mov_b64 s[12:13], -1
; FLAT-NEXT: s_mov_b64 vcc, s[2:3]
; FLAT-NEXT: s_cbranch_vccz BB3_2
; FLAT-NEXT: ; %bb.6: ; %if.end
; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1
; FLAT-NEXT: s_mov_b64 s[12:13], -1
; FLAT-NEXT: s_mov_b64 vcc, s[4:5]
; FLAT-NEXT: s_cbranch_vccz BB3_1
; FLAT-NEXT: ; %bb.7: ; %if.else
; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1
; FLAT-NEXT: s_mov_b64 s[12:13], 0
; FLAT-NEXT: buffer_store_dword v0, off, s[4:7], 0
; FLAT-NEXT: s_branch BB3_1
; FLAT-NEXT: BB3_5: ; %for.cond.preheader
; FLAT-NEXT: BB3_8: ; %loop.exit.guard4
; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1
; FLAT-NEXT: s_and_b64 vcc, exec, s[10:11]
; FLAT-NEXT: s_cbranch_vccz BB3_4
; FLAT-NEXT: ; %bb.9: ; %loop.exit.guard
; FLAT-NEXT: s_and_b64 vcc, exec, s[14:15]
; FLAT-NEXT: s_cbranch_vccz BB3_13
; FLAT-NEXT: ; %bb.10: ; %for.cond.preheader
; FLAT-NEXT: v_mov_b32_e32 v0, 0x3e8
; FLAT-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0
; FLAT-NEXT: v_cmp_lt_i32_e32 vcc, s8, v0
; FLAT-NEXT: s_and_b64 vcc, exec, vcc
; FLAT-NEXT: s_cbranch_vccz BB3_8
; FLAT-NEXT: ; %bb.6: ; %for.body
; FLAT-NEXT: s_cbranch_vccz BB3_13
; FLAT-NEXT: ; %bb.11: ; %for.body
; FLAT-NEXT: s_and_b64 vcc, exec, 0
; FLAT-NEXT: BB3_7: ; %self.loop
; FLAT-NEXT: BB3_12: ; %self.loop
; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1
; FLAT-NEXT: s_cbranch_vccz BB3_7
; FLAT-NEXT: BB3_8: ; %DummyReturnBlock
; FLAT-NEXT: s_cbranch_vccz BB3_12
; FLAT-NEXT: BB3_13: ; %DummyReturnBlock
; FLAT-NEXT: s_endpgm
entry:
%cmp = icmp sgt i32 %c0, 0