forked from OSchip/llvm-project
[AMDGPU] Do not generate ELF symbols for the local branch target labels
The compiler was generating symbols in the final code object for local branch target labels. This bloats the code object, slows down the loader, and is only used to simplify disassembly. Use '--symbolize-operands' with llvm-objdump to improve readability of the branch target operands in disassembly. Fixes: SWDEV-312223 Reviewed By: scott.linder Differential Revision: https://reviews.llvm.org/D114273
This commit is contained in:
parent
43f5f6916f
commit
18f9351223
|
@ -28,7 +28,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT,
|
|||
MaxInstLength = (TT.getArch() == Triple::amdgcn) ? 20 : 16;
|
||||
SeparatorString = "\n";
|
||||
CommentString = ";";
|
||||
PrivateLabelPrefix = "";
|
||||
InlineAsmStart = ";#ASMSTART";
|
||||
InlineAsmEnd = ";#ASMEND";
|
||||
|
||||
|
|
|
@ -34,13 +34,13 @@ define amdgpu_cs void @atomic_add(<4 x i32> inreg %arg) {
|
|||
; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s5, v0
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc
|
||||
; GCN-NEXT: s_cbranch_execz BB0_2
|
||||
; GCN-NEXT: s_cbranch_execz .LBB0_2
|
||||
; GCN-NEXT: ; %bb.1:
|
||||
; GCN-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GCN-NEXT: buffer_atomic_add v0, v1, s[0:3], 0 idxen glc
|
||||
; GCN-NEXT: BB0_2:
|
||||
; GCN-NEXT: .LBB0_2:
|
||||
; GCN-NEXT: s_endpgm
|
||||
.entry:
|
||||
call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 1, <4 x i32> %arg, i32 0, i32 0, i32 0, i32 0)
|
||||
|
@ -78,13 +78,13 @@ define amdgpu_cs void @atomic_add_and_format(<4 x i32> inreg %arg) {
|
|||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GCN-NEXT: ; implicit-def: $vgpr1
|
||||
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GCN-NEXT: s_cbranch_execz BB1_2
|
||||
; GCN-NEXT: s_cbranch_execz .LBB1_2
|
||||
; GCN-NEXT: ; %bb.1:
|
||||
; GCN-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s6
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GCN-NEXT: buffer_atomic_add v1, v2, s[0:3], 0 idxen glc
|
||||
; GCN-NEXT: BB1_2:
|
||||
; GCN-NEXT: .LBB1_2:
|
||||
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_readfirstlane_b32 s4, v1
|
||||
|
@ -128,13 +128,13 @@ define amdgpu_cs void @atomic_sub(<4 x i32> inreg %arg) {
|
|||
; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s5, v0
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc
|
||||
; GCN-NEXT: s_cbranch_execz BB2_2
|
||||
; GCN-NEXT: s_cbranch_execz .LBB2_2
|
||||
; GCN-NEXT: ; %bb.1:
|
||||
; GCN-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GCN-NEXT: buffer_atomic_sub v0, v1, s[0:3], 0 idxen glc
|
||||
; GCN-NEXT: BB2_2:
|
||||
; GCN-NEXT: .LBB2_2:
|
||||
; GCN-NEXT: s_endpgm
|
||||
.entry:
|
||||
call i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32 1, <4 x i32> %arg, i32 0, i32 0, i32 0, i32 0)
|
||||
|
@ -172,13 +172,13 @@ define amdgpu_cs void @atomic_sub_and_format(<4 x i32> inreg %arg) {
|
|||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GCN-NEXT: ; implicit-def: $vgpr1
|
||||
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GCN-NEXT: s_cbranch_execz BB3_2
|
||||
; GCN-NEXT: s_cbranch_execz .LBB3_2
|
||||
; GCN-NEXT: ; %bb.1:
|
||||
; GCN-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s6
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GCN-NEXT: buffer_atomic_sub v1, v2, s[0:3], 0 idxen glc
|
||||
; GCN-NEXT: BB3_2:
|
||||
; GCN-NEXT: .LBB3_2:
|
||||
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_readfirstlane_b32 s4, v1
|
||||
|
@ -223,14 +223,14 @@ define amdgpu_cs void @atomic_xor(<4 x i32> inreg %arg) {
|
|||
; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s5, v0
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc
|
||||
; GCN-NEXT: s_cbranch_execz BB4_2
|
||||
; GCN-NEXT: s_cbranch_execz .LBB4_2
|
||||
; GCN-NEXT: ; %bb.1:
|
||||
; GCN-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
|
||||
; GCN-NEXT: s_and_b32 s4, s4, 1
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GCN-NEXT: buffer_atomic_xor v0, v1, s[0:3], 0 idxen glc
|
||||
; GCN-NEXT: BB4_2:
|
||||
; GCN-NEXT: .LBB4_2:
|
||||
; GCN-NEXT: s_endpgm
|
||||
.entry:
|
||||
call i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32 1, <4 x i32> %arg, i32 0, i32 0, i32 0, i32 0)
|
||||
|
@ -270,14 +270,14 @@ define amdgpu_cs void @atomic_xor_and_format(<4 x i32> inreg %arg) {
|
|||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GCN-NEXT: ; implicit-def: $vgpr1
|
||||
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GCN-NEXT: s_cbranch_execz BB5_2
|
||||
; GCN-NEXT: s_cbranch_execz .LBB5_2
|
||||
; GCN-NEXT: ; %bb.1:
|
||||
; GCN-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
|
||||
; GCN-NEXT: s_and_b32 s6, s6, 1
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s6
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GCN-NEXT: buffer_atomic_xor v1, v2, s[0:3], 0 idxen glc
|
||||
; GCN-NEXT: BB5_2:
|
||||
; GCN-NEXT: .LBB5_2:
|
||||
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_readfirstlane_b32 s4, v1
|
||||
|
|
|
@ -55,12 +55,12 @@ define amdgpu_kernel void @sgpr_trunc_brcond(i32 %cond) {
|
|||
; GCN-NEXT: s_xor_b32 s0, s0, -1
|
||||
; GCN-NEXT: s_and_b32 s0, s0, 1
|
||||
; GCN-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; GCN-NEXT: s_cbranch_scc1 BB3_2
|
||||
; GCN-NEXT: s_cbranch_scc1 .LBB3_2
|
||||
; GCN-NEXT: ; %bb.1: ; %bb0
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: flat_store_dword v[0:1], v0
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: BB3_2: ; %bb1
|
||||
; GCN-NEXT: .LBB3_2: ; %bb1
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 1
|
||||
; GCN-NEXT: flat_store_dword v[0:1], v0
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -86,12 +86,12 @@ define amdgpu_kernel void @brcond_sgpr_trunc_and(i32 %cond0, i32 %cond1) {
|
|||
; GCN-NEXT: s_xor_b32 s0, s0, -1
|
||||
; GCN-NEXT: s_and_b32 s0, s0, 1
|
||||
; GCN-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; GCN-NEXT: s_cbranch_scc1 BB4_2
|
||||
; GCN-NEXT: s_cbranch_scc1 .LBB4_2
|
||||
; GCN-NEXT: ; %bb.1: ; %bb0
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: flat_store_dword v[0:1], v0
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: BB4_2: ; %bb1
|
||||
; GCN-NEXT: .LBB4_2: ; %bb1
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 1
|
||||
; GCN-NEXT: flat_store_dword v[0:1], v0
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
|
|
|
@ -10,11 +10,11 @@ define i32 @divergent_if_swap_brtarget_order0(i32 %value) {
|
|||
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr0
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CHECK-NEXT: s_cbranch_execz BB0_2
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB0_2
|
||||
; CHECK-NEXT: ; %bb.1: ; %if.true
|
||||
; CHECK-NEXT: global_load_dword v0, v[0:1], off glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: BB0_2: ; %endif
|
||||
; CHECK-NEXT: .LBB0_2: ; %endif
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
|
@ -38,11 +38,11 @@ define i32 @divergent_if_swap_brtarget_order1(i32 %value) {
|
|||
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr0
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CHECK-NEXT: s_cbranch_execz BB1_2
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB1_2
|
||||
; CHECK-NEXT: ; %bb.1: ; %if.true
|
||||
; CHECK-NEXT: global_load_dword v0, v[0:1], off glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: BB1_2: ; %endif
|
||||
; CHECK-NEXT: .LBB1_2: ; %endif
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
|
@ -68,11 +68,11 @@ define i32 @divergent_if_nonboolean_condition0(i32 %value) {
|
|||
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr0
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CHECK-NEXT: s_cbranch_execz BB2_2
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB2_2
|
||||
; CHECK-NEXT: ; %bb.1: ; %if.true
|
||||
; CHECK-NEXT: global_load_dword v0, v[0:1], off glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: BB2_2: ; %endif
|
||||
; CHECK-NEXT: .LBB2_2: ; %endif
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
|
@ -100,11 +100,11 @@ define i32 @divergent_if_nonboolean_condition1(i32 addrspace(1)* %ptr) {
|
|||
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr0
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CHECK-NEXT: s_cbranch_execz BB3_2
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB3_2
|
||||
; CHECK-NEXT: ; %bb.1: ; %if.true
|
||||
; CHECK-NEXT: global_load_dword v0, v[0:1], off glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: BB3_2: ; %endif
|
||||
; CHECK-NEXT: .LBB3_2: ; %endif
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
|
@ -139,7 +139,7 @@ define void @constrained_if_register_class() {
|
|||
; CHECK-NEXT: s_load_dword s4, s[4:5], 0x0
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_cmp_lg_u32 s4, 0
|
||||
; CHECK-NEXT: s_cbranch_scc1 BB4_4
|
||||
; CHECK-NEXT: s_cbranch_scc1 .LBB4_4
|
||||
; CHECK-NEXT: ; %bb.1: ; %bb2
|
||||
; CHECK-NEXT: s_getpc_b64 s[6:7]
|
||||
; CHECK-NEXT: s_add_u32 s6, s6, const.ptr@gotpcrel32@lo+4
|
||||
|
@ -153,15 +153,15 @@ define void @constrained_if_register_class() {
|
|||
; CHECK-NEXT: global_load_dword v0, v0, s[6:7]
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, 1.0, v0
|
||||
; CHECK-NEXT: s_cbranch_vccnz BB4_3
|
||||
; CHECK-NEXT: s_cbranch_vccnz .LBB4_3
|
||||
; CHECK-NEXT: ; %bb.2: ; %bb7
|
||||
; CHECK-NEXT: s_mov_b32 s4, 0
|
||||
; CHECK-NEXT: BB4_3: ; %bb8
|
||||
; CHECK-NEXT: .LBB4_3: ; %bb8
|
||||
; CHECK-NEXT: s_cmp_lg_u32 s4, 0
|
||||
; CHECK-NEXT: s_cbranch_scc0 BB4_5
|
||||
; CHECK-NEXT: BB4_4: ; %bb12
|
||||
; CHECK-NEXT: s_cbranch_scc0 .LBB4_5
|
||||
; CHECK-NEXT: .LBB4_4: ; %bb12
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
; CHECK-NEXT: BB4_5: ; %bb11
|
||||
; CHECK-NEXT: .LBB4_5: ; %bb11
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 4.0
|
||||
; CHECK-NEXT: buffer_store_dword v0, v0, s[0:3], 0 offen
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -202,26 +202,26 @@ define amdgpu_kernel void @break_loop(i32 %arg) {
|
|||
; CHECK-NEXT: ; implicit-def: $vgpr1
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: v_subrev_u32_e32 v0, s2, v0
|
||||
; CHECK-NEXT: s_branch BB5_2
|
||||
; CHECK-NEXT: BB5_1: ; %Flow
|
||||
; CHECK-NEXT: s_branch .LBB5_2
|
||||
; CHECK-NEXT: .LBB5_1: ; %Flow
|
||||
; CHECK-NEXT: ; in Loop: Header=BB5_2 Depth=1
|
||||
; CHECK-NEXT: s_and_b64 s[2:3], exec, s[2:3]
|
||||
; CHECK-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
|
||||
; CHECK-NEXT: s_andn2_b64 exec, exec, s[0:1]
|
||||
; CHECK-NEXT: s_cbranch_execz BB5_4
|
||||
; CHECK-NEXT: BB5_2: ; %bb1
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB5_4
|
||||
; CHECK-NEXT: .LBB5_2: ; %bb1
|
||||
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: v_add_u32_e32 v1, 1, v1
|
||||
; CHECK-NEXT: v_cmp_le_i32_e32 vcc, 0, v1
|
||||
; CHECK-NEXT: s_mov_b64 s[2:3], -1
|
||||
; CHECK-NEXT: s_cbranch_vccnz BB5_1
|
||||
; CHECK-NEXT: s_cbranch_vccnz .LBB5_1
|
||||
; CHECK-NEXT: ; %bb.3: ; %bb4
|
||||
; CHECK-NEXT: ; in Loop: Header=BB5_2 Depth=1
|
||||
; CHECK-NEXT: global_load_dword v2, v[0:1], off glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: v_cmp_ge_i32_e64 s[2:3], v0, v2
|
||||
; CHECK-NEXT: s_branch BB5_1
|
||||
; CHECK-NEXT: BB5_4: ; %bb9
|
||||
; CHECK-NEXT: s_branch .LBB5_1
|
||||
; CHECK-NEXT: .LBB5_4: ; %bb9
|
||||
; CHECK-NEXT: s_endpgm
|
||||
bb:
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
|
|
@ -949,7 +949,7 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out
|
|||
; GFX7-NEXT: s_mov_b32 s6, 0
|
||||
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX7-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; GFX7-NEXT: s_cbranch_execz BB13_2
|
||||
; GFX7-NEXT: s_cbranch_execz .LBB13_2
|
||||
; GFX7-NEXT: ; %bb.1: ; %bb
|
||||
; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x1d
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -957,7 +957,7 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out
|
|||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; GFX7-NEXT: s_cselect_b32 s6, 1, 0
|
||||
; GFX7-NEXT: BB13_2: ; %exit
|
||||
; GFX7-NEXT: .LBB13_2: ; %exit
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX7-NEXT: s_and_b32 s0, 1, s6
|
||||
; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
|
||||
|
@ -983,7 +983,7 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out
|
|||
; GFX8-NEXT: flat_load_dwordx3 v[1:3], v[1:2]
|
||||
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GFX8-NEXT: s_cbranch_execz BB13_2
|
||||
; GFX8-NEXT: s_cbranch_execz .LBB13_2
|
||||
; GFX8-NEXT: ; %bb.1: ; %bb
|
||||
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x74
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -991,7 +991,7 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out
|
|||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; GFX8-NEXT: s_cselect_b32 s6, 1, 0
|
||||
; GFX8-NEXT: BB13_2: ; %exit
|
||||
; GFX8-NEXT: .LBB13_2: ; %exit
|
||||
; GFX8-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX8-NEXT: s_add_u32 s0, s2, 8
|
||||
; GFX8-NEXT: s_addc_u32 s1, s3, 0
|
||||
|
@ -1016,7 +1016,7 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out
|
|||
; GFX10_W32-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
|
||||
; GFX10_W32-NEXT: s_and_saveexec_b32 s4, vcc_lo
|
||||
; GFX10_W32-NEXT: s_cbranch_execz BB13_2
|
||||
; GFX10_W32-NEXT: s_cbranch_execz .LBB13_2
|
||||
; GFX10_W32-NEXT: ; %bb.1: ; %bb
|
||||
; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x74
|
||||
; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -1024,7 +1024,7 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out
|
|||
; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10_W32-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; GFX10_W32-NEXT: s_cselect_b32 s5, 1, 0
|
||||
; GFX10_W32-NEXT: BB13_2: ; %exit
|
||||
; GFX10_W32-NEXT: .LBB13_2: ; %exit
|
||||
; GFX10_W32-NEXT: s_or_b32 exec_lo, exec_lo, s4
|
||||
; GFX10_W32-NEXT: s_and_b32 s0, 1, s5
|
||||
; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0
|
||||
|
@ -1046,7 +1046,7 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out
|
|||
; GFX10_W64-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
|
||||
; GFX10_W64-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GFX10_W64-NEXT: s_cbranch_execz BB13_2
|
||||
; GFX10_W64-NEXT: s_cbranch_execz .LBB13_2
|
||||
; GFX10_W64-NEXT: ; %bb.1: ; %bb
|
||||
; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x74
|
||||
; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -1054,7 +1054,7 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out
|
|||
; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10_W64-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; GFX10_W64-NEXT: s_cselect_b32 s6, 1, 0
|
||||
; GFX10_W64-NEXT: BB13_2: ; %exit
|
||||
; GFX10_W64-NEXT: .LBB13_2: ; %exit
|
||||
; GFX10_W64-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX10_W64-NEXT: s_and_b32 s0, 1, s6
|
||||
; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
|
||||
|
|
|
@ -9,12 +9,12 @@ define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) {
|
|||
; GCN-NEXT: s_load_dword s0, s[4:5], 0x24
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_cmp_lg_u32 s1, 0
|
||||
; GCN-NEXT: s_cbranch_scc1 BB0_2
|
||||
; GCN-NEXT: s_cbranch_scc1 .LBB0_2
|
||||
; GCN-NEXT: ; %bb.1: ; %mid
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: global_store_dword v[0:1], v0, off
|
||||
; GCN-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GCN-NEXT: BB0_2: ; %bb
|
||||
; GCN-NEXT: .LBB0_2: ; %bb
|
||||
; GCN-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s0
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
|
|
|
@ -8,12 +8,12 @@ define amdgpu_kernel void @test_wave64(i32 %arg0, i64 %saved) {
|
|||
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_cmp_lg_u32 s2, 0
|
||||
; GCN-NEXT: s_cbranch_scc1 BB0_2
|
||||
; GCN-NEXT: s_cbranch_scc1 .LBB0_2
|
||||
; GCN-NEXT: ; %bb.1: ; %mid
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: global_store_dword v[0:1], v0, off
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: BB0_2: ; %bb
|
||||
; GCN-NEXT: .LBB0_2: ; %bb
|
||||
; GCN-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: global_store_dword v[0:1], v0, off
|
||||
|
|
|
@ -159,7 +159,7 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr,
|
|||
; GFX1030-NEXT: v_mov_b32_e32 v27, v11
|
||||
; GFX1030-NEXT: v_mov_b32_e32 v28, v12
|
||||
; GFX1030-NEXT: s_mov_b32 s1, exec_lo
|
||||
; GFX1030-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1030-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1030-NEXT: v_readfirstlane_b32 s4, v14
|
||||
; GFX1030-NEXT: v_readfirstlane_b32 s5, v15
|
||||
; GFX1030-NEXT: v_readfirstlane_b32 s6, v16
|
||||
|
@ -183,7 +183,7 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr,
|
|||
; GFX1030-NEXT: ; implicit-def: $vgpr28
|
||||
; GFX1030-NEXT: ; implicit-def: $vgpr14_vgpr15_vgpr16_vgpr17
|
||||
; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
||||
; GFX1030-NEXT: s_cbranch_execnz BB6_1
|
||||
; GFX1030-NEXT: s_cbranch_execnz .LBB6_1
|
||||
; GFX1030-NEXT: ; %bb.2:
|
||||
; GFX1030-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX1030-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -200,7 +200,7 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr,
|
|||
; GFX1013-NEXT: v_mov_b32_e32 v18, v14
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v19, v15
|
||||
; GFX1013-NEXT: s_mov_b32 s1, exec_lo
|
||||
; GFX1013-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1013-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s4, v18
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s5, v19
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s6, v16
|
||||
|
@ -215,7 +215,7 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr,
|
|||
; GFX1013-NEXT: ; implicit-def: $vgpr14_vgpr15_vgpr16_vgpr17
|
||||
; GFX1013-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
||||
; GFX1013-NEXT: s_cbranch_execnz BB6_1
|
||||
; GFX1013-NEXT: s_cbranch_execnz .LBB6_1
|
||||
; GFX1013-NEXT: ; %bb.2:
|
||||
; GFX1013-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX1013-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -248,7 +248,7 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p
|
|||
; GFX1030-NEXT: v_and_or_b32 v19, v6, s0, v0
|
||||
; GFX1030-NEXT: v_and_or_b32 v20, v7, s0, v1
|
||||
; GFX1030-NEXT: v_lshl_or_b32 v21, v3, 16, v2
|
||||
; GFX1030-NEXT: BB7_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1030-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1030-NEXT: v_readfirstlane_b32 s4, v10
|
||||
; GFX1030-NEXT: v_readfirstlane_b32 s5, v11
|
||||
; GFX1030-NEXT: v_readfirstlane_b32 s6, v12
|
||||
|
@ -269,7 +269,7 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p
|
|||
; GFX1030-NEXT: ; implicit-def: $vgpr21
|
||||
; GFX1030-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13
|
||||
; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
||||
; GFX1030-NEXT: s_cbranch_execnz BB7_1
|
||||
; GFX1030-NEXT: s_cbranch_execnz .LBB7_1
|
||||
; GFX1030-NEXT: ; %bb.2:
|
||||
; GFX1030-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX1030-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -288,7 +288,7 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p
|
|||
; GFX1013-NEXT: v_and_or_b32 v5, v6, s0, v5
|
||||
; GFX1013-NEXT: v_and_or_b32 v6, v7, s0, v14
|
||||
; GFX1013-NEXT: v_lshl_or_b32 v7, v9, 16, v8
|
||||
; GFX1013-NEXT: BB7_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1013-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s4, v10
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s5, v11
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s6, v12
|
||||
|
@ -303,7 +303,7 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p
|
|||
; GFX1013-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13
|
||||
; GFX1013-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
||||
; GFX1013-NEXT: s_cbranch_execnz BB7_1
|
||||
; GFX1013-NEXT: s_cbranch_execnz .LBB7_1
|
||||
; GFX1013-NEXT: ; %bb.2:
|
||||
; GFX1013-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX1013-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -333,7 +333,7 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr
|
|||
; GFX1030-NEXT: v_mov_b32_e32 v29, v12
|
||||
; GFX1030-NEXT: v_mov_b32_e32 v30, v13
|
||||
; GFX1030-NEXT: s_mov_b32 s1, exec_lo
|
||||
; GFX1030-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1030-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1030-NEXT: v_readfirstlane_b32 s4, v15
|
||||
; GFX1030-NEXT: v_readfirstlane_b32 s5, v16
|
||||
; GFX1030-NEXT: v_readfirstlane_b32 s6, v17
|
||||
|
@ -358,7 +358,7 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr
|
|||
; GFX1030-NEXT: ; implicit-def: $vgpr30
|
||||
; GFX1030-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18
|
||||
; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
||||
; GFX1030-NEXT: s_cbranch_execnz BB8_1
|
||||
; GFX1030-NEXT: s_cbranch_execnz .LBB8_1
|
||||
; GFX1030-NEXT: ; %bb.2:
|
||||
; GFX1030-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX1030-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -375,7 +375,7 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr
|
|||
; GFX1013-NEXT: v_mov_b32_e32 v19, v15
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v20, v16
|
||||
; GFX1013-NEXT: s_mov_b32 s1, exec_lo
|
||||
; GFX1013-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1013-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s4, v19
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s5, v20
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s6, v17
|
||||
|
@ -390,7 +390,7 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr
|
|||
; GFX1013-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18
|
||||
; GFX1013-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
||||
; GFX1013-NEXT: s_cbranch_execnz BB8_1
|
||||
; GFX1013-NEXT: s_cbranch_execnz .LBB8_1
|
||||
; GFX1013-NEXT: ; %bb.2:
|
||||
; GFX1013-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX1013-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -424,7 +424,7 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node
|
|||
; GFX1030-NEXT: v_and_or_b32 v22, v8, s0, v1
|
||||
; GFX1030-NEXT: v_lshl_or_b32 v23, v3, 16, v2
|
||||
; GFX1030-NEXT: s_mov_b32 s1, exec_lo
|
||||
; GFX1030-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1030-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1030-NEXT: v_readfirstlane_b32 s4, v11
|
||||
; GFX1030-NEXT: v_readfirstlane_b32 s5, v12
|
||||
; GFX1030-NEXT: v_readfirstlane_b32 s6, v13
|
||||
|
@ -446,7 +446,7 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node
|
|||
; GFX1030-NEXT: ; implicit-def: $vgpr23
|
||||
; GFX1030-NEXT: ; implicit-def: $vgpr11_vgpr12_vgpr13_vgpr14
|
||||
; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
||||
; GFX1030-NEXT: s_cbranch_execnz BB9_1
|
||||
; GFX1030-NEXT: s_cbranch_execnz .LBB9_1
|
||||
; GFX1030-NEXT: ; %bb.2:
|
||||
; GFX1030-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX1030-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -469,7 +469,7 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node
|
|||
; GFX1013-NEXT: v_and_or_b32 v6, v7, s0, v6
|
||||
; GFX1013-NEXT: v_and_or_b32 v7, v8, s0, v11
|
||||
; GFX1013-NEXT: v_lshl_or_b32 v8, v10, 16, v9
|
||||
; GFX1013-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1013-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s4, v16
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s5, v17
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s6, v18
|
||||
|
@ -484,7 +484,7 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node
|
|||
; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19
|
||||
; GFX1013-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
||||
; GFX1013-NEXT: s_cbranch_execnz BB9_1
|
||||
; GFX1013-NEXT: s_cbranch_execnz .LBB9_1
|
||||
; GFX1013-NEXT: ; %bb.2:
|
||||
; GFX1013-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX1013-NEXT: s_waitcnt vmcnt(0)
|
||||
|
|
|
@ -69,12 +69,12 @@ define amdgpu_kernel void @is_private_sgpr(i8* %ptr) {
|
|||
; CI-NEXT: s_load_dword s0, s[4:5], 0x11
|
||||
; CI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CI-NEXT: s_cmp_lg_u32 s1, s0
|
||||
; CI-NEXT: s_cbranch_scc1 BB1_2
|
||||
; CI-NEXT: s_cbranch_scc1 .LBB1_2
|
||||
; CI-NEXT: ; %bb.1: ; %bb0
|
||||
; CI-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CI-NEXT: flat_store_dword v[0:1], v0
|
||||
; CI-NEXT: s_waitcnt vmcnt(0)
|
||||
; CI-NEXT: BB1_2: ; %bb1
|
||||
; CI-NEXT: .LBB1_2: ; %bb1
|
||||
; CI-NEXT: s_endpgm
|
||||
;
|
||||
; GFX9-LABEL: is_private_sgpr:
|
||||
|
@ -84,12 +84,12 @@ define amdgpu_kernel void @is_private_sgpr(i8* %ptr) {
|
|||
; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s0, 16
|
||||
; GFX9-NEXT: s_cmp_lg_u32 s1, s0
|
||||
; GFX9-NEXT: s_cbranch_scc1 BB1_2
|
||||
; GFX9-NEXT: s_cbranch_scc1 .LBB1_2
|
||||
; GFX9-NEXT: ; %bb.1: ; %bb0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX9-NEXT: global_store_dword v[0:1], v0, off
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: BB1_2: ; %bb1
|
||||
; GFX9-NEXT: .LBB1_2: ; %bb1
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
; GFX10-LABEL: is_private_sgpr:
|
||||
|
@ -99,12 +99,12 @@ define amdgpu_kernel void @is_private_sgpr(i8* %ptr) {
|
|||
; GFX10-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
|
||||
; GFX10-NEXT: s_lshl_b32 s0, s0, 16
|
||||
; GFX10-NEXT: s_cmp_lg_u32 s1, s0
|
||||
; GFX10-NEXT: s_cbranch_scc1 BB1_2
|
||||
; GFX10-NEXT: s_cbranch_scc1 .LBB1_2
|
||||
; GFX10-NEXT: ; %bb.1: ; %bb0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX10-NEXT: global_store_dword v[0:1], v0, off
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: BB1_2: ; %bb1
|
||||
; GFX10-NEXT: .LBB1_2: ; %bb1
|
||||
; GFX10-NEXT: s_endpgm
|
||||
%val = call i1 @llvm.amdgcn.is.private(i8* %ptr)
|
||||
br i1 %val, label %bb0, label %bb1
|
||||
|
|
|
@ -69,12 +69,12 @@ define amdgpu_kernel void @is_local_sgpr(i8* %ptr) {
|
|||
; CI-NEXT: s_load_dword s0, s[4:5], 0x10
|
||||
; CI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CI-NEXT: s_cmp_lg_u32 s1, s0
|
||||
; CI-NEXT: s_cbranch_scc1 BB1_2
|
||||
; CI-NEXT: s_cbranch_scc1 .LBB1_2
|
||||
; CI-NEXT: ; %bb.1: ; %bb0
|
||||
; CI-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CI-NEXT: flat_store_dword v[0:1], v0
|
||||
; CI-NEXT: s_waitcnt vmcnt(0)
|
||||
; CI-NEXT: BB1_2: ; %bb1
|
||||
; CI-NEXT: .LBB1_2: ; %bb1
|
||||
; CI-NEXT: s_endpgm
|
||||
;
|
||||
; GFX9-LABEL: is_local_sgpr:
|
||||
|
@ -84,12 +84,12 @@ define amdgpu_kernel void @is_local_sgpr(i8* %ptr) {
|
|||
; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s0, 16
|
||||
; GFX9-NEXT: s_cmp_lg_u32 s1, s0
|
||||
; GFX9-NEXT: s_cbranch_scc1 BB1_2
|
||||
; GFX9-NEXT: s_cbranch_scc1 .LBB1_2
|
||||
; GFX9-NEXT: ; %bb.1: ; %bb0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX9-NEXT: global_store_dword v[0:1], v0, off
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: BB1_2: ; %bb1
|
||||
; GFX9-NEXT: .LBB1_2: ; %bb1
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
; GFX10-LABEL: is_local_sgpr:
|
||||
|
@ -99,12 +99,12 @@ define amdgpu_kernel void @is_local_sgpr(i8* %ptr) {
|
|||
; GFX10-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
|
||||
; GFX10-NEXT: s_lshl_b32 s0, s0, 16
|
||||
; GFX10-NEXT: s_cmp_lg_u32 s1, s0
|
||||
; GFX10-NEXT: s_cbranch_scc1 BB1_2
|
||||
; GFX10-NEXT: s_cbranch_scc1 .LBB1_2
|
||||
; GFX10-NEXT: ; %bb.1: ; %bb0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX10-NEXT: global_store_dword v[0:1], v0, off
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: BB1_2: ; %bb1
|
||||
; GFX10-NEXT: .LBB1_2: ; %bb1
|
||||
; GFX10-NEXT: s_endpgm
|
||||
%val = call i1 @llvm.amdgcn.is.shared(i8* %ptr)
|
||||
br i1 %val, label %bb0, label %bb1
|
||||
|
|
|
@ -56,26 +56,26 @@ define amdgpu_kernel void @set_inactive_scc(i32 addrspace(1)* %out, i32 %in, <4
|
|||
; GCN-NEXT: v_mov_b32_e32 v0, 42
|
||||
; GCN-NEXT: s_not_b64 exec, exec
|
||||
; GCN-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; GCN-NEXT: s_cbranch_scc0 BB2_2
|
||||
; GCN-NEXT: s_cbranch_scc0 .LBB2_2
|
||||
; GCN-NEXT: ; %bb.1: ; %.one
|
||||
; GCN-NEXT: v_add_u32_e32 v1, vcc, 1, v0
|
||||
; GCN-NEXT: s_mov_b32 s6, -1
|
||||
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
||||
; GCN-NEXT: s_mov_b32 s0, 0
|
||||
; GCN-NEXT: buffer_store_dword v1, off, s[4:7], 0
|
||||
; GCN-NEXT: s_branch BB2_3
|
||||
; GCN-NEXT: BB2_2:
|
||||
; GCN-NEXT: s_branch .LBB2_3
|
||||
; GCN-NEXT: .LBB2_2:
|
||||
; GCN-NEXT: s_mov_b32 s0, -1
|
||||
; GCN-NEXT: BB2_3: ; %Flow
|
||||
; GCN-NEXT: .LBB2_3: ; %Flow
|
||||
; GCN-NEXT: s_xor_b32 s0, s0, -1
|
||||
; GCN-NEXT: s_and_b32 s0, s0, 1
|
||||
; GCN-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; GCN-NEXT: s_cbranch_scc1 BB2_5
|
||||
; GCN-NEXT: s_cbranch_scc1 .LBB2_5
|
||||
; GCN-NEXT: ; %bb.4: ; %.zero
|
||||
; GCN-NEXT: s_mov_b32 s6, -1
|
||||
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; GCN-NEXT: BB2_5: ; %.exit
|
||||
; GCN-NEXT: .LBB2_5: ; %.exit
|
||||
; GCN-NEXT: s_endpgm
|
||||
%val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 0, i32 0)
|
||||
%cmp = icmp eq i32 %val, 56
|
||||
|
|
|
@ -9,13 +9,13 @@ define amdgpu_ps void @static_exact(float %arg0, float %arg1) {
|
|||
; SI: ; %bb.0: ; %.entry
|
||||
; SI-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
|
||||
; SI-NEXT: s_andn2_b64 exec, exec, exec
|
||||
; SI-NEXT: s_cbranch_scc0 BB0_2
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB0_2
|
||||
; SI-NEXT: ; %bb.1: ; %.entry
|
||||
; SI-NEXT: s_mov_b64 exec, 0
|
||||
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
|
||||
; SI-NEXT: exp mrt1 v0, v0, v0, v0 done vm
|
||||
; SI-NEXT: s_endpgm
|
||||
; SI-NEXT: BB0_2:
|
||||
; SI-NEXT: .LBB0_2:
|
||||
; SI-NEXT: s_mov_b64 exec, 0
|
||||
; SI-NEXT: exp null off, off, off, off done vm
|
||||
; SI-NEXT: s_endpgm
|
||||
|
@ -24,13 +24,13 @@ define amdgpu_ps void @static_exact(float %arg0, float %arg1) {
|
|||
; GFX9: ; %bb.0: ; %.entry
|
||||
; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
|
||||
; GFX9-NEXT: s_andn2_b64 exec, exec, exec
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB0_2
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB0_2
|
||||
; GFX9-NEXT: ; %bb.1: ; %.entry
|
||||
; GFX9-NEXT: s_mov_b64 exec, 0
|
||||
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
|
||||
; GFX9-NEXT: exp mrt1 v0, v0, v0, v0 done vm
|
||||
; GFX9-NEXT: s_endpgm
|
||||
; GFX9-NEXT: BB0_2:
|
||||
; GFX9-NEXT: .LBB0_2:
|
||||
; GFX9-NEXT: s_mov_b64 exec, 0
|
||||
; GFX9-NEXT: exp null off, off, off, off done vm
|
||||
; GFX9-NEXT: s_endpgm
|
||||
|
@ -39,13 +39,13 @@ define amdgpu_ps void @static_exact(float %arg0, float %arg1) {
|
|||
; GFX10-32: ; %bb.0: ; %.entry
|
||||
; GFX10-32-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v0
|
||||
; GFX10-32-NEXT: s_andn2_b32 exec_lo, exec_lo, exec_lo
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 BB0_2
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 .LBB0_2
|
||||
; GFX10-32-NEXT: ; %bb.1: ; %.entry
|
||||
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
|
||||
; GFX10-32-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc_lo
|
||||
; GFX10-32-NEXT: exp mrt1 v0, v0, v0, v0 done vm
|
||||
; GFX10-32-NEXT: s_endpgm
|
||||
; GFX10-32-NEXT: BB0_2:
|
||||
; GFX10-32-NEXT: .LBB0_2:
|
||||
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
|
||||
; GFX10-32-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-32-NEXT: s_endpgm
|
||||
|
@ -54,13 +54,13 @@ define amdgpu_ps void @static_exact(float %arg0, float %arg1) {
|
|||
; GFX10-64: ; %bb.0: ; %.entry
|
||||
; GFX10-64-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
|
||||
; GFX10-64-NEXT: s_andn2_b64 exec, exec, exec
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 BB0_2
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 .LBB0_2
|
||||
; GFX10-64-NEXT: ; %bb.1: ; %.entry
|
||||
; GFX10-64-NEXT: s_mov_b64 exec, 0
|
||||
; GFX10-64-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
|
||||
; GFX10-64-NEXT: exp mrt1 v0, v0, v0, v0 done vm
|
||||
; GFX10-64-NEXT: s_endpgm
|
||||
; GFX10-64-NEXT: BB0_2:
|
||||
; GFX10-64-NEXT: .LBB0_2:
|
||||
; GFX10-64-NEXT: s_mov_b64 exec, 0
|
||||
; GFX10-64-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-64-NEXT: s_endpgm
|
||||
|
@ -81,13 +81,13 @@ define amdgpu_ps void @dynamic_exact(float %arg0, float %arg1) {
|
|||
; SI-NEXT: s_xor_b64 s[0:1], s[0:1], exec
|
||||
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
|
||||
; SI-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
|
||||
; SI-NEXT: s_cbranch_scc0 BB1_2
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB1_2
|
||||
; SI-NEXT: ; %bb.1: ; %.entry
|
||||
; SI-NEXT: s_and_b64 exec, exec, s[2:3]
|
||||
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
|
||||
; SI-NEXT: exp mrt1 v0, v0, v0, v0 done vm
|
||||
; SI-NEXT: s_endpgm
|
||||
; SI-NEXT: BB1_2:
|
||||
; SI-NEXT: .LBB1_2:
|
||||
; SI-NEXT: s_mov_b64 exec, 0
|
||||
; SI-NEXT: exp null off, off, off, off done vm
|
||||
; SI-NEXT: s_endpgm
|
||||
|
@ -99,13 +99,13 @@ define amdgpu_ps void @dynamic_exact(float %arg0, float %arg1) {
|
|||
; GFX9-NEXT: s_xor_b64 s[0:1], s[0:1], exec
|
||||
; GFX9-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
|
||||
; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB1_2
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB1_2
|
||||
; GFX9-NEXT: ; %bb.1: ; %.entry
|
||||
; GFX9-NEXT: s_and_b64 exec, exec, s[2:3]
|
||||
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
|
||||
; GFX9-NEXT: exp mrt1 v0, v0, v0, v0 done vm
|
||||
; GFX9-NEXT: s_endpgm
|
||||
; GFX9-NEXT: BB1_2:
|
||||
; GFX9-NEXT: .LBB1_2:
|
||||
; GFX9-NEXT: s_mov_b64 exec, 0
|
||||
; GFX9-NEXT: exp null off, off, off, off done vm
|
||||
; GFX9-NEXT: s_endpgm
|
||||
|
@ -117,13 +117,13 @@ define amdgpu_ps void @dynamic_exact(float %arg0, float %arg1) {
|
|||
; GFX10-32-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v0
|
||||
; GFX10-32-NEXT: s_xor_b32 s0, s0, exec_lo
|
||||
; GFX10-32-NEXT: s_andn2_b32 s1, s1, s0
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 BB1_2
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 .LBB1_2
|
||||
; GFX10-32-NEXT: ; %bb.1: ; %.entry
|
||||
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s1
|
||||
; GFX10-32-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc_lo
|
||||
; GFX10-32-NEXT: exp mrt1 v0, v0, v0, v0 done vm
|
||||
; GFX10-32-NEXT: s_endpgm
|
||||
; GFX10-32-NEXT: BB1_2:
|
||||
; GFX10-32-NEXT: .LBB1_2:
|
||||
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
|
||||
; GFX10-32-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-32-NEXT: s_endpgm
|
||||
|
@ -135,13 +135,13 @@ define amdgpu_ps void @dynamic_exact(float %arg0, float %arg1) {
|
|||
; GFX10-64-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
|
||||
; GFX10-64-NEXT: s_xor_b64 s[0:1], s[0:1], exec
|
||||
; GFX10-64-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 BB1_2
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 .LBB1_2
|
||||
; GFX10-64-NEXT: ; %bb.1: ; %.entry
|
||||
; GFX10-64-NEXT: s_and_b64 exec, exec, s[2:3]
|
||||
; GFX10-64-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
|
||||
; GFX10-64-NEXT: exp mrt1 v0, v0, v0, v0 done vm
|
||||
; GFX10-64-NEXT: s_endpgm
|
||||
; GFX10-64-NEXT: BB1_2:
|
||||
; GFX10-64-NEXT: .LBB1_2:
|
||||
; GFX10-64-NEXT: s_mov_b64 exec, 0
|
||||
; GFX10-64-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-64-NEXT: s_endpgm
|
||||
|
@ -166,18 +166,18 @@ define amdgpu_ps void @branch(float %arg0, float %arg1) {
|
|||
; SI-NEXT: s_xor_b64 s[2:3], vcc, -1
|
||||
; SI-NEXT: s_and_saveexec_b64 s[4:5], s[2:3]
|
||||
; SI-NEXT: s_xor_b64 s[2:3], exec, s[4:5]
|
||||
; SI-NEXT: s_cbranch_execz BB2_3
|
||||
; SI-NEXT: s_cbranch_execz .LBB2_3
|
||||
; SI-NEXT: ; %bb.1: ; %.demote
|
||||
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; SI-NEXT: s_cbranch_scc0 BB2_4
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB2_4
|
||||
; SI-NEXT: ; %bb.2: ; %.demote
|
||||
; SI-NEXT: s_mov_b64 exec, 0
|
||||
; SI-NEXT: BB2_3: ; %.continue
|
||||
; SI-NEXT: .LBB2_3: ; %.continue
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
|
||||
; SI-NEXT: exp mrt1 v0, v0, v0, v0 done vm
|
||||
; SI-NEXT: s_endpgm
|
||||
; SI-NEXT: BB2_4:
|
||||
; SI-NEXT: .LBB2_4:
|
||||
; SI-NEXT: s_mov_b64 exec, 0
|
||||
; SI-NEXT: exp null off, off, off, off done vm
|
||||
; SI-NEXT: s_endpgm
|
||||
|
@ -193,18 +193,18 @@ define amdgpu_ps void @branch(float %arg0, float %arg1) {
|
|||
; GFX9-NEXT: s_xor_b64 s[2:3], vcc, -1
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], s[2:3]
|
||||
; GFX9-NEXT: s_xor_b64 s[2:3], exec, s[4:5]
|
||||
; GFX9-NEXT: s_cbranch_execz BB2_3
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB2_3
|
||||
; GFX9-NEXT: ; %bb.1: ; %.demote
|
||||
; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB2_4
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB2_4
|
||||
; GFX9-NEXT: ; %bb.2: ; %.demote
|
||||
; GFX9-NEXT: s_mov_b64 exec, 0
|
||||
; GFX9-NEXT: BB2_3: ; %.continue
|
||||
; GFX9-NEXT: .LBB2_3: ; %.continue
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
|
||||
; GFX9-NEXT: exp mrt1 v0, v0, v0, v0 done vm
|
||||
; GFX9-NEXT: s_endpgm
|
||||
; GFX9-NEXT: BB2_4:
|
||||
; GFX9-NEXT: .LBB2_4:
|
||||
; GFX9-NEXT: s_mov_b64 exec, 0
|
||||
; GFX9-NEXT: exp null off, off, off, off done vm
|
||||
; GFX9-NEXT: s_endpgm
|
||||
|
@ -220,18 +220,18 @@ define amdgpu_ps void @branch(float %arg0, float %arg1) {
|
|||
; GFX10-32-NEXT: s_xor_b32 s1, vcc_lo, -1
|
||||
; GFX10-32-NEXT: s_and_saveexec_b32 s2, s1
|
||||
; GFX10-32-NEXT: s_xor_b32 s1, exec_lo, s2
|
||||
; GFX10-32-NEXT: s_cbranch_execz BB2_3
|
||||
; GFX10-32-NEXT: s_cbranch_execz .LBB2_3
|
||||
; GFX10-32-NEXT: ; %bb.1: ; %.demote
|
||||
; GFX10-32-NEXT: s_andn2_b32 s0, s0, exec_lo
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 BB2_4
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 .LBB2_4
|
||||
; GFX10-32-NEXT: ; %bb.2: ; %.demote
|
||||
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
|
||||
; GFX10-32-NEXT: BB2_3: ; %.continue
|
||||
; GFX10-32-NEXT: .LBB2_3: ; %.continue
|
||||
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s1
|
||||
; GFX10-32-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc_lo
|
||||
; GFX10-32-NEXT: exp mrt1 v0, v0, v0, v0 done vm
|
||||
; GFX10-32-NEXT: s_endpgm
|
||||
; GFX10-32-NEXT: BB2_4:
|
||||
; GFX10-32-NEXT: .LBB2_4:
|
||||
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
|
||||
; GFX10-32-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-32-NEXT: s_endpgm
|
||||
|
@ -247,18 +247,18 @@ define amdgpu_ps void @branch(float %arg0, float %arg1) {
|
|||
; GFX10-64-NEXT: s_xor_b64 s[2:3], vcc, -1
|
||||
; GFX10-64-NEXT: s_and_saveexec_b64 s[4:5], s[2:3]
|
||||
; GFX10-64-NEXT: s_xor_b64 s[2:3], exec, s[4:5]
|
||||
; GFX10-64-NEXT: s_cbranch_execz BB2_3
|
||||
; GFX10-64-NEXT: s_cbranch_execz .LBB2_3
|
||||
; GFX10-64-NEXT: ; %bb.1: ; %.demote
|
||||
; GFX10-64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 BB2_4
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 .LBB2_4
|
||||
; GFX10-64-NEXT: ; %bb.2: ; %.demote
|
||||
; GFX10-64-NEXT: s_mov_b64 exec, 0
|
||||
; GFX10-64-NEXT: BB2_3: ; %.continue
|
||||
; GFX10-64-NEXT: .LBB2_3: ; %.continue
|
||||
; GFX10-64-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX10-64-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
|
||||
; GFX10-64-NEXT: exp mrt1 v0, v0, v0, v0 done vm
|
||||
; GFX10-64-NEXT: s_endpgm
|
||||
; GFX10-64-NEXT: BB2_4:
|
||||
; GFX10-64-NEXT: .LBB2_4:
|
||||
; GFX10-64-NEXT: s_mov_b64 exec, 0
|
||||
; GFX10-64-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-64-NEXT: s_endpgm
|
||||
|
@ -288,14 +288,14 @@ define amdgpu_ps <4 x float> @wqm_demote_1(<8 x i32> inreg %rsrc, <4 x i32> inre
|
|||
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1
|
||||
; SI-NEXT: s_and_saveexec_b64 s[14:15], vcc
|
||||
; SI-NEXT: s_xor_b64 s[14:15], exec, s[14:15]
|
||||
; SI-NEXT: s_cbranch_execz BB3_3
|
||||
; SI-NEXT: s_cbranch_execz .LBB3_3
|
||||
; SI-NEXT: ; %bb.1: ; %.demote
|
||||
; SI-NEXT: s_andn2_b64 s[12:13], s[12:13], exec
|
||||
; SI-NEXT: s_cbranch_scc0 BB3_4
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB3_4
|
||||
; SI-NEXT: ; %bb.2: ; %.demote
|
||||
; SI-NEXT: s_wqm_b64 s[16:17], s[12:13]
|
||||
; SI-NEXT: s_and_b64 exec, exec, s[16:17]
|
||||
; SI-NEXT: BB3_3: ; %.continue
|
||||
; SI-NEXT: .LBB3_3: ; %.continue
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[14:15]
|
||||
; SI-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -303,12 +303,12 @@ define amdgpu_ps <4 x float> @wqm_demote_1(<8 x i32> inreg %rsrc, <4 x i32> inre
|
|||
; SI-NEXT: s_and_b64 exec, exec, s[12:13]
|
||||
; SI-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: s_branch BB3_5
|
||||
; SI-NEXT: BB3_4:
|
||||
; SI-NEXT: s_branch .LBB3_5
|
||||
; SI-NEXT: .LBB3_4:
|
||||
; SI-NEXT: s_mov_b64 exec, 0
|
||||
; SI-NEXT: exp null off, off, off, off done vm
|
||||
; SI-NEXT: s_endpgm
|
||||
; SI-NEXT: BB3_5:
|
||||
; SI-NEXT: .LBB3_5:
|
||||
;
|
||||
; GFX9-LABEL: wqm_demote_1:
|
||||
; GFX9: ; %bb.0: ; %.entry
|
||||
|
@ -317,14 +317,14 @@ define amdgpu_ps <4 x float> @wqm_demote_1(<8 x i32> inreg %rsrc, <4 x i32> inre
|
|||
; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[14:15], vcc
|
||||
; GFX9-NEXT: s_xor_b64 s[14:15], exec, s[14:15]
|
||||
; GFX9-NEXT: s_cbranch_execz BB3_3
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB3_3
|
||||
; GFX9-NEXT: ; %bb.1: ; %.demote
|
||||
; GFX9-NEXT: s_andn2_b64 s[12:13], s[12:13], exec
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB3_4
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB3_4
|
||||
; GFX9-NEXT: ; %bb.2: ; %.demote
|
||||
; GFX9-NEXT: s_wqm_b64 s[16:17], s[12:13]
|
||||
; GFX9-NEXT: s_and_b64 exec, exec, s[16:17]
|
||||
; GFX9-NEXT: BB3_3: ; %.continue
|
||||
; GFX9-NEXT: .LBB3_3: ; %.continue
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[14:15]
|
||||
; GFX9-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -332,12 +332,12 @@ define amdgpu_ps <4 x float> @wqm_demote_1(<8 x i32> inreg %rsrc, <4 x i32> inre
|
|||
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
|
||||
; GFX9-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_branch BB3_5
|
||||
; GFX9-NEXT: BB3_4:
|
||||
; GFX9-NEXT: s_branch .LBB3_5
|
||||
; GFX9-NEXT: .LBB3_4:
|
||||
; GFX9-NEXT: s_mov_b64 exec, 0
|
||||
; GFX9-NEXT: exp null off, off, off, off done vm
|
||||
; GFX9-NEXT: s_endpgm
|
||||
; GFX9-NEXT: BB3_5:
|
||||
; GFX9-NEXT: .LBB3_5:
|
||||
;
|
||||
; GFX10-32-LABEL: wqm_demote_1:
|
||||
; GFX10-32: ; %bb.0: ; %.entry
|
||||
|
@ -346,14 +346,14 @@ define amdgpu_ps <4 x float> @wqm_demote_1(<8 x i32> inreg %rsrc, <4 x i32> inre
|
|||
; GFX10-32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v1
|
||||
; GFX10-32-NEXT: s_and_saveexec_b32 s13, vcc_lo
|
||||
; GFX10-32-NEXT: s_xor_b32 s13, exec_lo, s13
|
||||
; GFX10-32-NEXT: s_cbranch_execz BB3_3
|
||||
; GFX10-32-NEXT: s_cbranch_execz .LBB3_3
|
||||
; GFX10-32-NEXT: ; %bb.1: ; %.demote
|
||||
; GFX10-32-NEXT: s_andn2_b32 s12, s12, exec_lo
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 BB3_4
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 .LBB3_4
|
||||
; GFX10-32-NEXT: ; %bb.2: ; %.demote
|
||||
; GFX10-32-NEXT: s_wqm_b32 s14, s12
|
||||
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s14
|
||||
; GFX10-32-NEXT: BB3_3: ; %.continue
|
||||
; GFX10-32-NEXT: .LBB3_3: ; %.continue
|
||||
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s13
|
||||
; GFX10-32-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
||||
; GFX10-32-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -361,12 +361,12 @@ define amdgpu_ps <4 x float> @wqm_demote_1(<8 x i32> inreg %rsrc, <4 x i32> inre
|
|||
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
||||
; GFX10-32-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
||||
; GFX10-32-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-32-NEXT: s_branch BB3_5
|
||||
; GFX10-32-NEXT: BB3_4:
|
||||
; GFX10-32-NEXT: s_branch .LBB3_5
|
||||
; GFX10-32-NEXT: .LBB3_4:
|
||||
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
|
||||
; GFX10-32-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-32-NEXT: s_endpgm
|
||||
; GFX10-32-NEXT: BB3_5:
|
||||
; GFX10-32-NEXT: .LBB3_5:
|
||||
;
|
||||
; GFX10-64-LABEL: wqm_demote_1:
|
||||
; GFX10-64: ; %bb.0: ; %.entry
|
||||
|
@ -375,14 +375,14 @@ define amdgpu_ps <4 x float> @wqm_demote_1(<8 x i32> inreg %rsrc, <4 x i32> inre
|
|||
; GFX10-64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1
|
||||
; GFX10-64-NEXT: s_and_saveexec_b64 s[14:15], vcc
|
||||
; GFX10-64-NEXT: s_xor_b64 s[14:15], exec, s[14:15]
|
||||
; GFX10-64-NEXT: s_cbranch_execz BB3_3
|
||||
; GFX10-64-NEXT: s_cbranch_execz .LBB3_3
|
||||
; GFX10-64-NEXT: ; %bb.1: ; %.demote
|
||||
; GFX10-64-NEXT: s_andn2_b64 s[12:13], s[12:13], exec
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 BB3_4
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 .LBB3_4
|
||||
; GFX10-64-NEXT: ; %bb.2: ; %.demote
|
||||
; GFX10-64-NEXT: s_wqm_b64 s[16:17], s[12:13]
|
||||
; GFX10-64-NEXT: s_and_b64 exec, exec, s[16:17]
|
||||
; GFX10-64-NEXT: BB3_3: ; %.continue
|
||||
; GFX10-64-NEXT: .LBB3_3: ; %.continue
|
||||
; GFX10-64-NEXT: s_or_b64 exec, exec, s[14:15]
|
||||
; GFX10-64-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
||||
; GFX10-64-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -390,12 +390,12 @@ define amdgpu_ps <4 x float> @wqm_demote_1(<8 x i32> inreg %rsrc, <4 x i32> inre
|
|||
; GFX10-64-NEXT: s_and_b64 exec, exec, s[12:13]
|
||||
; GFX10-64-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
||||
; GFX10-64-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-64-NEXT: s_branch BB3_5
|
||||
; GFX10-64-NEXT: BB3_4:
|
||||
; GFX10-64-NEXT: s_branch .LBB3_5
|
||||
; GFX10-64-NEXT: .LBB3_4:
|
||||
; GFX10-64-NEXT: s_mov_b64 exec, 0
|
||||
; GFX10-64-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-64-NEXT: s_endpgm
|
||||
; GFX10-64-NEXT: BB3_5:
|
||||
; GFX10-64-NEXT: .LBB3_5:
|
||||
.entry:
|
||||
%z.cmp = fcmp olt float %z, 0.0
|
||||
br i1 %z.cmp, label %.continue, label %.demote
|
||||
|
@ -424,25 +424,25 @@ define amdgpu_ps <4 x float> @wqm_demote_2(<8 x i32> inreg %rsrc, <4 x i32> inre
|
|||
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
||||
; SI-NEXT: s_and_saveexec_b64 s[14:15], vcc
|
||||
; SI-NEXT: s_xor_b64 s[14:15], exec, s[14:15]
|
||||
; SI-NEXT: s_cbranch_execz BB4_3
|
||||
; SI-NEXT: s_cbranch_execz .LBB4_3
|
||||
; SI-NEXT: ; %bb.1: ; %.demote
|
||||
; SI-NEXT: s_andn2_b64 s[12:13], s[12:13], exec
|
||||
; SI-NEXT: s_cbranch_scc0 BB4_4
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB4_4
|
||||
; SI-NEXT: ; %bb.2: ; %.demote
|
||||
; SI-NEXT: s_wqm_b64 s[16:17], s[12:13]
|
||||
; SI-NEXT: s_and_b64 exec, exec, s[16:17]
|
||||
; SI-NEXT: BB4_3: ; %.continue
|
||||
; SI-NEXT: .LBB4_3: ; %.continue
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[14:15]
|
||||
; SI-NEXT: v_add_f32_e32 v0, v0, v0
|
||||
; SI-NEXT: s_and_b64 exec, exec, s[12:13]
|
||||
; SI-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: s_branch BB4_5
|
||||
; SI-NEXT: BB4_4:
|
||||
; SI-NEXT: s_branch .LBB4_5
|
||||
; SI-NEXT: .LBB4_4:
|
||||
; SI-NEXT: s_mov_b64 exec, 0
|
||||
; SI-NEXT: exp null off, off, off, off done vm
|
||||
; SI-NEXT: s_endpgm
|
||||
; SI-NEXT: BB4_5:
|
||||
; SI-NEXT: .LBB4_5:
|
||||
;
|
||||
; GFX9-LABEL: wqm_demote_2:
|
||||
; GFX9: ; %bb.0: ; %.entry
|
||||
|
@ -453,25 +453,25 @@ define amdgpu_ps <4 x float> @wqm_demote_2(<8 x i32> inreg %rsrc, <4 x i32> inre
|
|||
; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[14:15], vcc
|
||||
; GFX9-NEXT: s_xor_b64 s[14:15], exec, s[14:15]
|
||||
; GFX9-NEXT: s_cbranch_execz BB4_3
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB4_3
|
||||
; GFX9-NEXT: ; %bb.1: ; %.demote
|
||||
; GFX9-NEXT: s_andn2_b64 s[12:13], s[12:13], exec
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB4_4
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB4_4
|
||||
; GFX9-NEXT: ; %bb.2: ; %.demote
|
||||
; GFX9-NEXT: s_wqm_b64 s[16:17], s[12:13]
|
||||
; GFX9-NEXT: s_and_b64 exec, exec, s[16:17]
|
||||
; GFX9-NEXT: BB4_3: ; %.continue
|
||||
; GFX9-NEXT: .LBB4_3: ; %.continue
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[14:15]
|
||||
; GFX9-NEXT: v_add_f32_e32 v0, v0, v0
|
||||
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
|
||||
; GFX9-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_branch BB4_5
|
||||
; GFX9-NEXT: BB4_4:
|
||||
; GFX9-NEXT: s_branch .LBB4_5
|
||||
; GFX9-NEXT: .LBB4_4:
|
||||
; GFX9-NEXT: s_mov_b64 exec, 0
|
||||
; GFX9-NEXT: exp null off, off, off, off done vm
|
||||
; GFX9-NEXT: s_endpgm
|
||||
; GFX9-NEXT: BB4_5:
|
||||
; GFX9-NEXT: .LBB4_5:
|
||||
;
|
||||
; GFX10-32-LABEL: wqm_demote_2:
|
||||
; GFX10-32: ; %bb.0: ; %.entry
|
||||
|
@ -482,25 +482,25 @@ define amdgpu_ps <4 x float> @wqm_demote_2(<8 x i32> inreg %rsrc, <4 x i32> inre
|
|||
; GFX10-32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
|
||||
; GFX10-32-NEXT: s_and_saveexec_b32 s13, vcc_lo
|
||||
; GFX10-32-NEXT: s_xor_b32 s13, exec_lo, s13
|
||||
; GFX10-32-NEXT: s_cbranch_execz BB4_3
|
||||
; GFX10-32-NEXT: s_cbranch_execz .LBB4_3
|
||||
; GFX10-32-NEXT: ; %bb.1: ; %.demote
|
||||
; GFX10-32-NEXT: s_andn2_b32 s12, s12, exec_lo
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 BB4_4
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 .LBB4_4
|
||||
; GFX10-32-NEXT: ; %bb.2: ; %.demote
|
||||
; GFX10-32-NEXT: s_wqm_b32 s14, s12
|
||||
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s14
|
||||
; GFX10-32-NEXT: BB4_3: ; %.continue
|
||||
; GFX10-32-NEXT: .LBB4_3: ; %.continue
|
||||
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s13
|
||||
; GFX10-32-NEXT: v_add_f32_e32 v0, v0, v0
|
||||
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
||||
; GFX10-32-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
||||
; GFX10-32-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-32-NEXT: s_branch BB4_5
|
||||
; GFX10-32-NEXT: BB4_4:
|
||||
; GFX10-32-NEXT: s_branch .LBB4_5
|
||||
; GFX10-32-NEXT: .LBB4_4:
|
||||
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
|
||||
; GFX10-32-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-32-NEXT: s_endpgm
|
||||
; GFX10-32-NEXT: BB4_5:
|
||||
; GFX10-32-NEXT: .LBB4_5:
|
||||
;
|
||||
; GFX10-64-LABEL: wqm_demote_2:
|
||||
; GFX10-64: ; %bb.0: ; %.entry
|
||||
|
@ -511,25 +511,25 @@ define amdgpu_ps <4 x float> @wqm_demote_2(<8 x i32> inreg %rsrc, <4 x i32> inre
|
|||
; GFX10-64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
||||
; GFX10-64-NEXT: s_and_saveexec_b64 s[14:15], vcc
|
||||
; GFX10-64-NEXT: s_xor_b64 s[14:15], exec, s[14:15]
|
||||
; GFX10-64-NEXT: s_cbranch_execz BB4_3
|
||||
; GFX10-64-NEXT: s_cbranch_execz .LBB4_3
|
||||
; GFX10-64-NEXT: ; %bb.1: ; %.demote
|
||||
; GFX10-64-NEXT: s_andn2_b64 s[12:13], s[12:13], exec
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 BB4_4
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 .LBB4_4
|
||||
; GFX10-64-NEXT: ; %bb.2: ; %.demote
|
||||
; GFX10-64-NEXT: s_wqm_b64 s[16:17], s[12:13]
|
||||
; GFX10-64-NEXT: s_and_b64 exec, exec, s[16:17]
|
||||
; GFX10-64-NEXT: BB4_3: ; %.continue
|
||||
; GFX10-64-NEXT: .LBB4_3: ; %.continue
|
||||
; GFX10-64-NEXT: s_or_b64 exec, exec, s[14:15]
|
||||
; GFX10-64-NEXT: v_add_f32_e32 v0, v0, v0
|
||||
; GFX10-64-NEXT: s_and_b64 exec, exec, s[12:13]
|
||||
; GFX10-64-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
||||
; GFX10-64-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-64-NEXT: s_branch BB4_5
|
||||
; GFX10-64-NEXT: BB4_4:
|
||||
; GFX10-64-NEXT: s_branch .LBB4_5
|
||||
; GFX10-64-NEXT: .LBB4_4:
|
||||
; GFX10-64-NEXT: s_mov_b64 exec, 0
|
||||
; GFX10-64-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-64-NEXT: s_endpgm
|
||||
; GFX10-64-NEXT: BB4_5:
|
||||
; GFX10-64-NEXT: .LBB4_5:
|
||||
.entry:
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%tex0 = extractelement <4 x float> %tex, i32 0
|
||||
|
@ -558,7 +558,7 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
|
|||
; SI-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
|
||||
; SI-NEXT: s_xor_b64 s[14:15], vcc, exec
|
||||
; SI-NEXT: s_andn2_b64 s[12:13], s[12:13], s[14:15]
|
||||
; SI-NEXT: s_cbranch_scc0 BB5_2
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB5_2
|
||||
; SI-NEXT: ; %bb.1: ; %.entry
|
||||
; SI-NEXT: s_wqm_b64 s[14:15], s[12:13]
|
||||
; SI-NEXT: s_and_b64 exec, exec, s[14:15]
|
||||
|
@ -566,12 +566,12 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
|
|||
; SI-NEXT: s_and_b64 exec, exec, s[12:13]
|
||||
; SI-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: s_branch BB5_3
|
||||
; SI-NEXT: BB5_2:
|
||||
; SI-NEXT: s_branch .LBB5_3
|
||||
; SI-NEXT: .LBB5_2:
|
||||
; SI-NEXT: s_mov_b64 exec, 0
|
||||
; SI-NEXT: exp null off, off, off, off done vm
|
||||
; SI-NEXT: s_endpgm
|
||||
; SI-NEXT: BB5_3:
|
||||
; SI-NEXT: .LBB5_3:
|
||||
;
|
||||
; GFX9-LABEL: wqm_demote_dynamic:
|
||||
; GFX9: ; %bb.0: ; %.entry
|
||||
|
@ -582,7 +582,7 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
|
|||
; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
|
||||
; GFX9-NEXT: s_xor_b64 s[14:15], vcc, exec
|
||||
; GFX9-NEXT: s_andn2_b64 s[12:13], s[12:13], s[14:15]
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB5_2
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB5_2
|
||||
; GFX9-NEXT: ; %bb.1: ; %.entry
|
||||
; GFX9-NEXT: s_wqm_b64 s[14:15], s[12:13]
|
||||
; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
|
||||
|
@ -590,12 +590,12 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
|
|||
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
|
||||
; GFX9-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_branch BB5_3
|
||||
; GFX9-NEXT: BB5_2:
|
||||
; GFX9-NEXT: s_branch .LBB5_3
|
||||
; GFX9-NEXT: .LBB5_2:
|
||||
; GFX9-NEXT: s_mov_b64 exec, 0
|
||||
; GFX9-NEXT: exp null off, off, off, off done vm
|
||||
; GFX9-NEXT: s_endpgm
|
||||
; GFX9-NEXT: BB5_3:
|
||||
; GFX9-NEXT: .LBB5_3:
|
||||
;
|
||||
; GFX10-32-LABEL: wqm_demote_dynamic:
|
||||
; GFX10-32: ; %bb.0: ; %.entry
|
||||
|
@ -606,7 +606,7 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
|
|||
; GFX10-32-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v0
|
||||
; GFX10-32-NEXT: s_xor_b32 s13, vcc_lo, exec_lo
|
||||
; GFX10-32-NEXT: s_andn2_b32 s12, s12, s13
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 BB5_2
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 .LBB5_2
|
||||
; GFX10-32-NEXT: ; %bb.1: ; %.entry
|
||||
; GFX10-32-NEXT: s_wqm_b32 s13, s12
|
||||
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s13
|
||||
|
@ -614,12 +614,12 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
|
|||
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
||||
; GFX10-32-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
||||
; GFX10-32-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-32-NEXT: s_branch BB5_3
|
||||
; GFX10-32-NEXT: BB5_2:
|
||||
; GFX10-32-NEXT: s_branch .LBB5_3
|
||||
; GFX10-32-NEXT: .LBB5_2:
|
||||
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
|
||||
; GFX10-32-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-32-NEXT: s_endpgm
|
||||
; GFX10-32-NEXT: BB5_3:
|
||||
; GFX10-32-NEXT: .LBB5_3:
|
||||
;
|
||||
; GFX10-64-LABEL: wqm_demote_dynamic:
|
||||
; GFX10-64: ; %bb.0: ; %.entry
|
||||
|
@ -630,7 +630,7 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
|
|||
; GFX10-64-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
|
||||
; GFX10-64-NEXT: s_xor_b64 s[14:15], vcc, exec
|
||||
; GFX10-64-NEXT: s_andn2_b64 s[12:13], s[12:13], s[14:15]
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 BB5_2
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 .LBB5_2
|
||||
; GFX10-64-NEXT: ; %bb.1: ; %.entry
|
||||
; GFX10-64-NEXT: s_wqm_b64 s[14:15], s[12:13]
|
||||
; GFX10-64-NEXT: s_and_b64 exec, exec, s[14:15]
|
||||
|
@ -638,12 +638,12 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
|
|||
; GFX10-64-NEXT: s_and_b64 exec, exec, s[12:13]
|
||||
; GFX10-64-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
||||
; GFX10-64-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-64-NEXT: s_branch BB5_3
|
||||
; GFX10-64-NEXT: BB5_2:
|
||||
; GFX10-64-NEXT: s_branch .LBB5_3
|
||||
; GFX10-64-NEXT: .LBB5_2:
|
||||
; GFX10-64-NEXT: s_mov_b64 exec, 0
|
||||
; GFX10-64-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-64-NEXT: s_endpgm
|
||||
; GFX10-64-NEXT: BB5_3:
|
||||
; GFX10-64-NEXT: .LBB5_3:
|
||||
.entry:
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%tex0 = extractelement <4 x float> %tex, i32 0
|
||||
|
@ -672,14 +672,14 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
|
|||
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
|
||||
; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; SI-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
||||
; SI-NEXT: s_cbranch_execz BB6_3
|
||||
; SI-NEXT: s_cbranch_execz .LBB6_3
|
||||
; SI-NEXT: ; %bb.1: ; %.demote0
|
||||
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; SI-NEXT: s_cbranch_scc0 BB6_7
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB6_7
|
||||
; SI-NEXT: ; %bb.2: ; %.demote0
|
||||
; SI-NEXT: s_wqm_b64 s[6:7], s[0:1]
|
||||
; SI-NEXT: s_and_b64 exec, exec, s[6:7]
|
||||
; SI-NEXT: BB6_3: ; %.continue0
|
||||
; SI-NEXT: .LBB6_3: ; %.continue0
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; SI-NEXT: s_mov_b64 s[2:3], s[0:1]
|
||||
; SI-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s[2:3]
|
||||
|
@ -695,19 +695,19 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
|
|||
; SI-NEXT: s_xor_b64 s[2:3], s[2:3], -1
|
||||
; SI-NEXT: s_and_saveexec_b64 s[6:7], s[2:3]
|
||||
; SI-NEXT: s_xor_b64 s[2:3], exec, s[6:7]
|
||||
; SI-NEXT: s_cbranch_execz BB6_6
|
||||
; SI-NEXT: s_cbranch_execz .LBB6_6
|
||||
; SI-NEXT: ; %bb.4: ; %.demote1
|
||||
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; SI-NEXT: s_cbranch_scc0 BB6_7
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB6_7
|
||||
; SI-NEXT: ; %bb.5: ; %.demote1
|
||||
; SI-NEXT: s_mov_b64 exec, 0
|
||||
; SI-NEXT: BB6_6: ; %.continue1
|
||||
; SI-NEXT: .LBB6_6: ; %.continue1
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; SI-NEXT: v_mov_b32_e32 v0, s4
|
||||
; SI-NEXT: v_mov_b32_e32 v1, s5
|
||||
; SI-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm
|
||||
; SI-NEXT: s_endpgm
|
||||
; SI-NEXT: BB6_7:
|
||||
; SI-NEXT: .LBB6_7:
|
||||
; SI-NEXT: s_mov_b64 exec, 0
|
||||
; SI-NEXT: exp null off, off, off, off done vm
|
||||
; SI-NEXT: s_endpgm
|
||||
|
@ -720,14 +720,14 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
|
|||
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; GFX9-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
||||
; GFX9-NEXT: s_cbranch_execz BB6_3
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB6_3
|
||||
; GFX9-NEXT: ; %bb.1: ; %.demote0
|
||||
; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB6_7
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB6_7
|
||||
; GFX9-NEXT: ; %bb.2: ; %.demote0
|
||||
; GFX9-NEXT: s_wqm_b64 s[4:5], s[0:1]
|
||||
; GFX9-NEXT: s_and_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: BB6_3: ; %.continue0
|
||||
; GFX9-NEXT: .LBB6_3: ; %.continue0
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX9-NEXT: s_mov_b64 s[2:3], s[0:1]
|
||||
; GFX9-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s[2:3]
|
||||
|
@ -743,19 +743,19 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
|
|||
; GFX9-NEXT: s_xor_b64 s[2:3], s[2:3], -1
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], s[2:3]
|
||||
; GFX9-NEXT: s_xor_b64 s[2:3], exec, s[4:5]
|
||||
; GFX9-NEXT: s_cbranch_execz BB6_6
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB6_6
|
||||
; GFX9-NEXT: ; %bb.4: ; %.demote1
|
||||
; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB6_7
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB6_7
|
||||
; GFX9-NEXT: ; %bb.5: ; %.demote1
|
||||
; GFX9-NEXT: s_mov_b64 exec, 0
|
||||
; GFX9-NEXT: BB6_6: ; %.continue1
|
||||
; GFX9-NEXT: .LBB6_6: ; %.continue1
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0x3c00
|
||||
; GFX9-NEXT: v_bfrev_b32_e32 v1, 60
|
||||
; GFX9-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm
|
||||
; GFX9-NEXT: s_endpgm
|
||||
; GFX9-NEXT: BB6_7:
|
||||
; GFX9-NEXT: .LBB6_7:
|
||||
; GFX9-NEXT: s_mov_b64 exec, 0
|
||||
; GFX9-NEXT: exp null off, off, off, off done vm
|
||||
; GFX9-NEXT: s_endpgm
|
||||
|
@ -768,14 +768,14 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
|
|||
; GFX10-32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
|
||||
; GFX10-32-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX10-32-NEXT: s_xor_b32 s1, exec_lo, s1
|
||||
; GFX10-32-NEXT: s_cbranch_execz BB6_3
|
||||
; GFX10-32-NEXT: s_cbranch_execz .LBB6_3
|
||||
; GFX10-32-NEXT: ; %bb.1: ; %.demote0
|
||||
; GFX10-32-NEXT: s_andn2_b32 s0, s0, exec_lo
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 BB6_7
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 .LBB6_7
|
||||
; GFX10-32-NEXT: ; %bb.2: ; %.demote0
|
||||
; GFX10-32-NEXT: s_wqm_b32 s2, s0
|
||||
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s2
|
||||
; GFX10-32-NEXT: BB6_3: ; %.continue0
|
||||
; GFX10-32-NEXT: .LBB6_3: ; %.continue0
|
||||
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s1
|
||||
; GFX10-32-NEXT: s_mov_b32 s1, s0
|
||||
; GFX10-32-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s1
|
||||
|
@ -789,19 +789,19 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
|
|||
; GFX10-32-NEXT: s_xor_b32 s1, s1, -1
|
||||
; GFX10-32-NEXT: s_and_saveexec_b32 s2, s1
|
||||
; GFX10-32-NEXT: s_xor_b32 s1, exec_lo, s2
|
||||
; GFX10-32-NEXT: s_cbranch_execz BB6_6
|
||||
; GFX10-32-NEXT: s_cbranch_execz .LBB6_6
|
||||
; GFX10-32-NEXT: ; %bb.4: ; %.demote1
|
||||
; GFX10-32-NEXT: s_andn2_b32 s0, s0, exec_lo
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 BB6_7
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 .LBB6_7
|
||||
; GFX10-32-NEXT: ; %bb.5: ; %.demote1
|
||||
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
|
||||
; GFX10-32-NEXT: BB6_6: ; %.continue1
|
||||
; GFX10-32-NEXT: .LBB6_6: ; %.continue1
|
||||
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s1
|
||||
; GFX10-32-NEXT: v_mov_b32_e32 v0, 0x3c00
|
||||
; GFX10-32-NEXT: v_bfrev_b32_e32 v1, 60
|
||||
; GFX10-32-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm
|
||||
; GFX10-32-NEXT: s_endpgm
|
||||
; GFX10-32-NEXT: BB6_7:
|
||||
; GFX10-32-NEXT: .LBB6_7:
|
||||
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
|
||||
; GFX10-32-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-32-NEXT: s_endpgm
|
||||
|
@ -814,14 +814,14 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
|
|||
; GFX10-64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
|
||||
; GFX10-64-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; GFX10-64-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
||||
; GFX10-64-NEXT: s_cbranch_execz BB6_3
|
||||
; GFX10-64-NEXT: s_cbranch_execz .LBB6_3
|
||||
; GFX10-64-NEXT: ; %bb.1: ; %.demote0
|
||||
; GFX10-64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 BB6_7
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 .LBB6_7
|
||||
; GFX10-64-NEXT: ; %bb.2: ; %.demote0
|
||||
; GFX10-64-NEXT: s_wqm_b64 s[4:5], s[0:1]
|
||||
; GFX10-64-NEXT: s_and_b64 exec, exec, s[4:5]
|
||||
; GFX10-64-NEXT: BB6_3: ; %.continue0
|
||||
; GFX10-64-NEXT: .LBB6_3: ; %.continue0
|
||||
; GFX10-64-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX10-64-NEXT: s_mov_b64 s[2:3], s[0:1]
|
||||
; GFX10-64-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s[2:3]
|
||||
|
@ -835,19 +835,19 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
|
|||
; GFX10-64-NEXT: s_xor_b64 s[2:3], s[2:3], -1
|
||||
; GFX10-64-NEXT: s_and_saveexec_b64 s[4:5], s[2:3]
|
||||
; GFX10-64-NEXT: s_xor_b64 s[2:3], exec, s[4:5]
|
||||
; GFX10-64-NEXT: s_cbranch_execz BB6_6
|
||||
; GFX10-64-NEXT: s_cbranch_execz .LBB6_6
|
||||
; GFX10-64-NEXT: ; %bb.4: ; %.demote1
|
||||
; GFX10-64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 BB6_7
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 .LBB6_7
|
||||
; GFX10-64-NEXT: ; %bb.5: ; %.demote1
|
||||
; GFX10-64-NEXT: s_mov_b64 exec, 0
|
||||
; GFX10-64-NEXT: BB6_6: ; %.continue1
|
||||
; GFX10-64-NEXT: .LBB6_6: ; %.continue1
|
||||
; GFX10-64-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX10-64-NEXT: v_mov_b32_e32 v0, 0x3c00
|
||||
; GFX10-64-NEXT: v_bfrev_b32_e32 v1, 60
|
||||
; GFX10-64-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm
|
||||
; GFX10-64-NEXT: s_endpgm
|
||||
; GFX10-64-NEXT: BB6_7:
|
||||
; GFX10-64-NEXT: .LBB6_7:
|
||||
; GFX10-64-NEXT: s_mov_b64 exec, 0
|
||||
; GFX10-64-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-64-NEXT: s_endpgm
|
||||
|
@ -903,27 +903,27 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
|
|||
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
|
||||
; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; SI-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
||||
; SI-NEXT: s_cbranch_execz BB7_3
|
||||
; SI-NEXT: s_cbranch_execz .LBB7_3
|
||||
; SI-NEXT: ; %bb.1: ; %.demote0
|
||||
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; SI-NEXT: s_cbranch_scc0 BB7_9
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB7_9
|
||||
; SI-NEXT: ; %bb.2: ; %.demote0
|
||||
; SI-NEXT: s_wqm_b64 s[8:9], s[0:1]
|
||||
; SI-NEXT: s_and_b64 exec, exec, s[8:9]
|
||||
; SI-NEXT: BB7_3: ; %.continue0.preheader
|
||||
; SI-NEXT: .LBB7_3: ; %.continue0.preheader
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; SI-NEXT: s_mov_b64 s[2:3], 0
|
||||
; SI-NEXT: v_mov_b32_e32 v0, s4
|
||||
; SI-NEXT: s_branch BB7_5
|
||||
; SI-NEXT: BB7_4: ; %.continue1
|
||||
; SI-NEXT: s_branch .LBB7_5
|
||||
; SI-NEXT: .LBB7_4: ; %.continue1
|
||||
; SI-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; SI-NEXT: v_add_u32_e32 v0, vcc, 1, v0
|
||||
; SI-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1
|
||||
; SI-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; SI-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; SI-NEXT: s_cbranch_execz BB7_8
|
||||
; SI-NEXT: BB7_5: ; %.continue0
|
||||
; SI-NEXT: s_cbranch_execz .LBB7_8
|
||||
; SI-NEXT: .LBB7_5: ; %.continue0
|
||||
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; SI-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; SI-NEXT: v_cndmask_b32_e64 v2, v0, 0, s[4:5]
|
||||
|
@ -938,24 +938,24 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
|
|||
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], -1
|
||||
; SI-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
|
||||
; SI-NEXT: s_xor_b64 s[4:5], exec, s[8:9]
|
||||
; SI-NEXT: s_cbranch_execz BB7_4
|
||||
; SI-NEXT: s_cbranch_execz .LBB7_4
|
||||
; SI-NEXT: ; %bb.6: ; %.demote1
|
||||
; SI-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; SI-NEXT: s_cbranch_scc0 BB7_9
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB7_9
|
||||
; SI-NEXT: ; %bb.7: ; %.demote1
|
||||
; SI-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; SI-NEXT: s_wqm_b64 s[8:9], s[0:1]
|
||||
; SI-NEXT: s_and_b64 exec, exec, s[8:9]
|
||||
; SI-NEXT: s_branch BB7_4
|
||||
; SI-NEXT: BB7_8: ; %.return
|
||||
; SI-NEXT: s_branch .LBB7_4
|
||||
; SI-NEXT: .LBB7_8: ; %.return
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; SI-NEXT: s_and_b64 exec, exec, s[0:1]
|
||||
; SI-NEXT: v_mov_b32_e32 v0, s6
|
||||
; SI-NEXT: v_mov_b32_e32 v1, s7
|
||||
; SI-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm
|
||||
; SI-NEXT: s_endpgm
|
||||
; SI-NEXT: BB7_9:
|
||||
; SI-NEXT: .LBB7_9:
|
||||
; SI-NEXT: s_mov_b64 exec, 0
|
||||
; SI-NEXT: exp null off, off, off, off done vm
|
||||
; SI-NEXT: s_endpgm
|
||||
|
@ -969,27 +969,27 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
|
|||
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; GFX9-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
||||
; GFX9-NEXT: s_cbranch_execz BB7_3
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB7_3
|
||||
; GFX9-NEXT: ; %bb.1: ; %.demote0
|
||||
; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB7_9
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB7_9
|
||||
; GFX9-NEXT: ; %bb.2: ; %.demote0
|
||||
; GFX9-NEXT: s_wqm_b64 s[6:7], s[0:1]
|
||||
; GFX9-NEXT: s_and_b64 exec, exec, s[6:7]
|
||||
; GFX9-NEXT: BB7_3: ; %.continue0.preheader
|
||||
; GFX9-NEXT: .LBB7_3: ; %.continue0.preheader
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX9-NEXT: s_mov_b64 s[2:3], 0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX9-NEXT: s_branch BB7_5
|
||||
; GFX9-NEXT: BB7_4: ; %.continue1
|
||||
; GFX9-NEXT: s_branch .LBB7_5
|
||||
; GFX9-NEXT: .LBB7_4: ; %.continue1
|
||||
; GFX9-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: v_add_u32_e32 v0, 1, v0
|
||||
; GFX9-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1
|
||||
; GFX9-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX9-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX9-NEXT: s_cbranch_execz BB7_8
|
||||
; GFX9-NEXT: BB7_5: ; %.continue0
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB7_8
|
||||
; GFX9-NEXT: .LBB7_5: ; %.continue0
|
||||
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX9-NEXT: v_cndmask_b32_e64 v2, v0, 0, s[4:5]
|
||||
|
@ -1004,24 +1004,24 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
|
|||
; GFX9-NEXT: s_xor_b64 s[4:5], s[4:5], -1
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
|
||||
; GFX9-NEXT: s_cbranch_execz BB7_4
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB7_4
|
||||
; GFX9-NEXT: ; %bb.6: ; %.demote1
|
||||
; GFX9-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB7_9
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB7_9
|
||||
; GFX9-NEXT: ; %bb.7: ; %.demote1
|
||||
; GFX9-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; GFX9-NEXT: s_wqm_b64 s[6:7], s[0:1]
|
||||
; GFX9-NEXT: s_and_b64 exec, exec, s[6:7]
|
||||
; GFX9-NEXT: s_branch BB7_4
|
||||
; GFX9-NEXT: BB7_8: ; %.return
|
||||
; GFX9-NEXT: s_branch .LBB7_4
|
||||
; GFX9-NEXT: .LBB7_8: ; %.return
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX9-NEXT: s_and_b64 exec, exec, s[0:1]
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0x3c00
|
||||
; GFX9-NEXT: v_bfrev_b32_e32 v1, 60
|
||||
; GFX9-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm
|
||||
; GFX9-NEXT: s_endpgm
|
||||
; GFX9-NEXT: BB7_9:
|
||||
; GFX9-NEXT: .LBB7_9:
|
||||
; GFX9-NEXT: s_mov_b64 exec, 0
|
||||
; GFX9-NEXT: exp null off, off, off, off done vm
|
||||
; GFX9-NEXT: s_endpgm
|
||||
|
@ -1035,26 +1035,26 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
|
|||
; GFX10-32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
|
||||
; GFX10-32-NEXT: s_and_saveexec_b32 s2, vcc_lo
|
||||
; GFX10-32-NEXT: s_xor_b32 s2, exec_lo, s2
|
||||
; GFX10-32-NEXT: s_cbranch_execz BB7_3
|
||||
; GFX10-32-NEXT: s_cbranch_execz .LBB7_3
|
||||
; GFX10-32-NEXT: ; %bb.1: ; %.demote0
|
||||
; GFX10-32-NEXT: s_andn2_b32 s0, s0, exec_lo
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 BB7_9
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 .LBB7_9
|
||||
; GFX10-32-NEXT: ; %bb.2: ; %.demote0
|
||||
; GFX10-32-NEXT: s_wqm_b32 s3, s0
|
||||
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s3
|
||||
; GFX10-32-NEXT: BB7_3: ; %.continue0.preheader
|
||||
; GFX10-32-NEXT: .LBB7_3: ; %.continue0.preheader
|
||||
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s2
|
||||
; GFX10-32-NEXT: v_mov_b32_e32 v0, s1
|
||||
; GFX10-32-NEXT: s_branch BB7_5
|
||||
; GFX10-32-NEXT: BB7_4: ; %.continue1
|
||||
; GFX10-32-NEXT: s_branch .LBB7_5
|
||||
; GFX10-32-NEXT: .LBB7_4: ; %.continue1
|
||||
; GFX10-32-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s2
|
||||
; GFX10-32-NEXT: v_add_nc_u32_e32 v0, 1, v0
|
||||
; GFX10-32-NEXT: v_cmp_ge_i32_e32 vcc_lo, v0, v1
|
||||
; GFX10-32-NEXT: s_or_b32 s1, vcc_lo, s1
|
||||
; GFX10-32-NEXT: s_andn2_b32 exec_lo, exec_lo, s1
|
||||
; GFX10-32-NEXT: s_cbranch_execz BB7_8
|
||||
; GFX10-32-NEXT: BB7_5: ; %.continue0
|
||||
; GFX10-32-NEXT: s_cbranch_execz .LBB7_8
|
||||
; GFX10-32-NEXT: .LBB7_5: ; %.continue0
|
||||
; GFX10-32-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-32-NEXT: s_mov_b32 s2, s0
|
||||
; GFX10-32-NEXT: v_cndmask_b32_e64 v2, v0, 0, s2
|
||||
|
@ -1067,24 +1067,24 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
|
|||
; GFX10-32-NEXT: s_xor_b32 s2, s2, -1
|
||||
; GFX10-32-NEXT: s_and_saveexec_b32 s3, s2
|
||||
; GFX10-32-NEXT: s_xor_b32 s2, exec_lo, s3
|
||||
; GFX10-32-NEXT: s_cbranch_execz BB7_4
|
||||
; GFX10-32-NEXT: s_cbranch_execz .LBB7_4
|
||||
; GFX10-32-NEXT: ; %bb.6: ; %.demote1
|
||||
; GFX10-32-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; GFX10-32-NEXT: s_andn2_b32 s0, s0, exec_lo
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 BB7_9
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 .LBB7_9
|
||||
; GFX10-32-NEXT: ; %bb.7: ; %.demote1
|
||||
; GFX10-32-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; GFX10-32-NEXT: s_wqm_b32 s3, s0
|
||||
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s3
|
||||
; GFX10-32-NEXT: s_branch BB7_4
|
||||
; GFX10-32-NEXT: BB7_8: ; %.return
|
||||
; GFX10-32-NEXT: s_branch .LBB7_4
|
||||
; GFX10-32-NEXT: .LBB7_8: ; %.return
|
||||
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s1
|
||||
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s0
|
||||
; GFX10-32-NEXT: v_mov_b32_e32 v0, 0x3c00
|
||||
; GFX10-32-NEXT: v_bfrev_b32_e32 v1, 60
|
||||
; GFX10-32-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm
|
||||
; GFX10-32-NEXT: s_endpgm
|
||||
; GFX10-32-NEXT: BB7_9:
|
||||
; GFX10-32-NEXT: .LBB7_9:
|
||||
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
|
||||
; GFX10-32-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-32-NEXT: s_endpgm
|
||||
|
@ -1098,27 +1098,27 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
|
|||
; GFX10-64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
|
||||
; GFX10-64-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; GFX10-64-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
||||
; GFX10-64-NEXT: s_cbranch_execz BB7_3
|
||||
; GFX10-64-NEXT: s_cbranch_execz .LBB7_3
|
||||
; GFX10-64-NEXT: ; %bb.1: ; %.demote0
|
||||
; GFX10-64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 BB7_9
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 .LBB7_9
|
||||
; GFX10-64-NEXT: ; %bb.2: ; %.demote0
|
||||
; GFX10-64-NEXT: s_wqm_b64 s[6:7], s[0:1]
|
||||
; GFX10-64-NEXT: s_and_b64 exec, exec, s[6:7]
|
||||
; GFX10-64-NEXT: BB7_3: ; %.continue0.preheader
|
||||
; GFX10-64-NEXT: .LBB7_3: ; %.continue0.preheader
|
||||
; GFX10-64-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX10-64-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX10-64-NEXT: s_mov_b64 s[2:3], 0
|
||||
; GFX10-64-NEXT: s_branch BB7_5
|
||||
; GFX10-64-NEXT: BB7_4: ; %.continue1
|
||||
; GFX10-64-NEXT: s_branch .LBB7_5
|
||||
; GFX10-64-NEXT: .LBB7_4: ; %.continue1
|
||||
; GFX10-64-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; GFX10-64-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX10-64-NEXT: v_add_nc_u32_e32 v0, 1, v0
|
||||
; GFX10-64-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1
|
||||
; GFX10-64-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX10-64-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX10-64-NEXT: s_cbranch_execz BB7_8
|
||||
; GFX10-64-NEXT: BB7_5: ; %.continue0
|
||||
; GFX10-64-NEXT: s_cbranch_execz .LBB7_8
|
||||
; GFX10-64-NEXT: .LBB7_5: ; %.continue0
|
||||
; GFX10-64-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-64-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX10-64-NEXT: v_cndmask_b32_e64 v2, v0, 0, s[4:5]
|
||||
|
@ -1131,24 +1131,24 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
|
|||
; GFX10-64-NEXT: s_xor_b64 s[4:5], s[4:5], -1
|
||||
; GFX10-64-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; GFX10-64-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
|
||||
; GFX10-64-NEXT: s_cbranch_execz BB7_4
|
||||
; GFX10-64-NEXT: s_cbranch_execz .LBB7_4
|
||||
; GFX10-64-NEXT: ; %bb.6: ; %.demote1
|
||||
; GFX10-64-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; GFX10-64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 BB7_9
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 .LBB7_9
|
||||
; GFX10-64-NEXT: ; %bb.7: ; %.demote1
|
||||
; GFX10-64-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; GFX10-64-NEXT: s_wqm_b64 s[6:7], s[0:1]
|
||||
; GFX10-64-NEXT: s_and_b64 exec, exec, s[6:7]
|
||||
; GFX10-64-NEXT: s_branch BB7_4
|
||||
; GFX10-64-NEXT: BB7_8: ; %.return
|
||||
; GFX10-64-NEXT: s_branch .LBB7_4
|
||||
; GFX10-64-NEXT: .LBB7_8: ; %.return
|
||||
; GFX10-64-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX10-64-NEXT: s_and_b64 exec, exec, s[0:1]
|
||||
; GFX10-64-NEXT: v_mov_b32_e32 v0, 0x3c00
|
||||
; GFX10-64-NEXT: v_bfrev_b32_e32 v1, 60
|
||||
; GFX10-64-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm
|
||||
; GFX10-64-NEXT: s_endpgm
|
||||
; GFX10-64-NEXT: BB7_9:
|
||||
; GFX10-64-NEXT: .LBB7_9:
|
||||
; GFX10-64-NEXT: s_mov_b64 exec, 0
|
||||
; GFX10-64-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-64-NEXT: s_endpgm
|
||||
|
|
|
@ -15,7 +15,7 @@ define amdgpu_cs void @memcpy_p1i8(i8 addrspace(1)* %dst, i8 addrspace(1)* %src)
|
|||
; LOOP-NEXT: v_mov_b32_e32 v7, v1
|
||||
; LOOP-NEXT: v_mov_b32_e32 v6, v0
|
||||
; LOOP-NEXT: v_mov_b32_e32 v8, s6
|
||||
; LOOP-NEXT: BB0_1: ; %load-store-loop
|
||||
; LOOP-NEXT: .LBB0_1: ; %load-store-loop
|
||||
; LOOP-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; LOOP-NEXT: buffer_load_ubyte v9, v[4:5], s[4:7], 0 addr64
|
||||
; LOOP-NEXT: buffer_load_ubyte v10, v[4:5], s[4:7], 0 addr64 offset:1
|
||||
|
@ -72,7 +72,7 @@ define amdgpu_cs void @memcpy_p1i8(i8 addrspace(1)* %dst, i8 addrspace(1)* %src)
|
|||
; LOOP-NEXT: v_addc_u32_e64 v7, s[0:1], 0, v7, s[0:1]
|
||||
; LOOP-NEXT: v_add_i32_e64 v4, s[0:1], 16, v4
|
||||
; LOOP-NEXT: v_addc_u32_e64 v5, s[0:1], 0, v5, s[0:1]
|
||||
; LOOP-NEXT: s_cbranch_vccnz BB0_1
|
||||
; LOOP-NEXT: s_cbranch_vccnz .LBB0_1
|
||||
; LOOP-NEXT: ; %bb.2: ; %memcpy-split
|
||||
; LOOP-NEXT: s_mov_b32 s2, 0
|
||||
; LOOP-NEXT: s_mov_b32 s3, 0xf000
|
||||
|
|
|
@ -10,14 +10,14 @@ define amdgpu_cs void @memmove_p1i8(i8 addrspace(1)* %dst, i8 addrspace(1)* %src
|
|||
; LOOP-NEXT: v_cmp_ge_u64_e32 vcc, v[2:3], v[0:1]
|
||||
; LOOP-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; LOOP-NEXT: s_xor_b64 s[4:5], exec, s[0:1]
|
||||
; LOOP-NEXT: s_cbranch_execz BB0_3
|
||||
; LOOP-NEXT: s_cbranch_execz .LBB0_3
|
||||
; LOOP-NEXT: ; %bb.1: ; %copy_forward
|
||||
; LOOP-NEXT: s_mov_b64 s[0:1], 0
|
||||
; LOOP-NEXT: s_mov_b32 s2, 0
|
||||
; LOOP-NEXT: s_mov_b32 s3, 0xf000
|
||||
; LOOP-NEXT: v_mov_b32_e32 v5, s1
|
||||
; LOOP-NEXT: v_mov_b32_e32 v4, s0
|
||||
; LOOP-NEXT: BB0_2: ; %copy_forward_loop
|
||||
; LOOP-NEXT: .LBB0_2: ; %copy_forward_loop
|
||||
; LOOP-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; LOOP-NEXT: v_add_i32_e32 v6, vcc, v2, v4
|
||||
; LOOP-NEXT: v_addc_u32_e32 v7, vcc, v3, v5, vcc
|
||||
|
@ -30,11 +30,11 @@ define amdgpu_cs void @memmove_p1i8(i8 addrspace(1)* %dst, i8 addrspace(1)* %src
|
|||
; LOOP-NEXT: v_cmp_ne_u32_e32 vcc, 4, v4
|
||||
; LOOP-NEXT: s_waitcnt vmcnt(0)
|
||||
; LOOP-NEXT: buffer_store_byte v8, v[6:7], s[0:3], 0 addr64
|
||||
; LOOP-NEXT: s_cbranch_vccnz BB0_2
|
||||
; LOOP-NEXT: BB0_3: ; %Flow14
|
||||
; LOOP-NEXT: s_cbranch_vccnz .LBB0_2
|
||||
; LOOP-NEXT: .LBB0_3: ; %Flow14
|
||||
; LOOP-NEXT: s_or_saveexec_b64 s[0:1], s[4:5]
|
||||
; LOOP-NEXT: s_xor_b64 exec, exec, s[0:1]
|
||||
; LOOP-NEXT: s_cbranch_execz BB0_6
|
||||
; LOOP-NEXT: s_cbranch_execz .LBB0_6
|
||||
; LOOP-NEXT: ; %bb.4: ; %copy_backwards
|
||||
; LOOP-NEXT: s_mov_b64 s[4:5], 3
|
||||
; LOOP-NEXT: s_mov_b32 s2, 0
|
||||
|
@ -42,7 +42,7 @@ define amdgpu_cs void @memmove_p1i8(i8 addrspace(1)* %dst, i8 addrspace(1)* %src
|
|||
; LOOP-NEXT: s_mov_b64 s[0:1], 0
|
||||
; LOOP-NEXT: v_mov_b32_e32 v4, s4
|
||||
; LOOP-NEXT: v_mov_b32_e32 v5, s5
|
||||
; LOOP-NEXT: BB0_5: ; %copy_backwards_loop
|
||||
; LOOP-NEXT: .LBB0_5: ; %copy_backwards_loop
|
||||
; LOOP-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; LOOP-NEXT: v_add_i32_e32 v6, vcc, v2, v4
|
||||
; LOOP-NEXT: v_addc_u32_e32 v7, vcc, v3, v5, vcc
|
||||
|
@ -55,8 +55,8 @@ define amdgpu_cs void @memmove_p1i8(i8 addrspace(1)* %dst, i8 addrspace(1)* %src
|
|||
; LOOP-NEXT: v_cmp_eq_u32_e32 vcc, -1, v4
|
||||
; LOOP-NEXT: s_waitcnt vmcnt(0)
|
||||
; LOOP-NEXT: buffer_store_byte v8, v[6:7], s[0:3], 0 addr64
|
||||
; LOOP-NEXT: s_cbranch_vccz BB0_5
|
||||
; LOOP-NEXT: BB0_6: ; %memmove_done
|
||||
; LOOP-NEXT: s_cbranch_vccz .LBB0_5
|
||||
; LOOP-NEXT: .LBB0_6: ; %memmove_done
|
||||
; LOOP-NEXT: s_endpgm
|
||||
;
|
||||
; UNROLL-LABEL: memmove_p1i8:
|
||||
|
|
|
@ -12,7 +12,7 @@ define amdgpu_cs void @memset_p1i8(i8 addrspace(1)* %dst, i8 %val) {
|
|||
; LOOP-NEXT: s_mov_b32 s3, 0xf000
|
||||
; LOOP-NEXT: v_mov_b32_e32 v4, s1
|
||||
; LOOP-NEXT: v_mov_b32_e32 v3, s0
|
||||
; LOOP-NEXT: BB0_1: ; %loadstoreloop
|
||||
; LOOP-NEXT: .LBB0_1: ; %loadstoreloop
|
||||
; LOOP-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; LOOP-NEXT: v_add_i32_e32 v5, vcc, v0, v3
|
||||
; LOOP-NEXT: v_addc_u32_e32 v6, vcc, v1, v4, vcc
|
||||
|
@ -20,7 +20,7 @@ define amdgpu_cs void @memset_p1i8(i8 addrspace(1)* %dst, i8 %val) {
|
|||
; LOOP-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc
|
||||
; LOOP-NEXT: v_cmp_gt_u32_e32 vcc, 4, v3
|
||||
; LOOP-NEXT: buffer_store_byte v2, v[5:6], s[0:3], 0 addr64
|
||||
; LOOP-NEXT: s_cbranch_vccnz BB0_1
|
||||
; LOOP-NEXT: s_cbranch_vccnz .LBB0_1
|
||||
; LOOP-NEXT: ; %bb.2: ; %split
|
||||
; LOOP-NEXT: s_endpgm
|
||||
;
|
||||
|
|
|
@ -13,7 +13,7 @@ define amdgpu_kernel void @localize_constants(i1 %cond) {
|
|||
; GFX9-NEXT: s_xor_b32 s1, s1, -1
|
||||
; GFX9-NEXT: s_and_b32 s1, s1, 1
|
||||
; GFX9-NEXT: s_cmp_lg_u32 s1, 0
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB0_2
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB0_2
|
||||
; GFX9-NEXT: ; %bb.1: ; %bb1
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0x5be6
|
||||
; GFX9-NEXT: global_store_dword v[0:1], v0, off
|
||||
|
@ -34,11 +34,11 @@ define amdgpu_kernel void @localize_constants(i1 %cond) {
|
|||
; GFX9-NEXT: s_mov_b32 s0, 0
|
||||
; GFX9-NEXT: global_store_dword v[0:1], v0, off
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: BB0_2: ; %Flow
|
||||
; GFX9-NEXT: .LBB0_2: ; %Flow
|
||||
; GFX9-NEXT: s_xor_b32 s0, s0, -1
|
||||
; GFX9-NEXT: s_and_b32 s0, s0, 1
|
||||
; GFX9-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; GFX9-NEXT: s_cbranch_scc1 BB0_4
|
||||
; GFX9-NEXT: s_cbranch_scc1 .LBB0_4
|
||||
; GFX9-NEXT: ; %bb.3: ; %bb0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
|
||||
; GFX9-NEXT: global_store_dword v[0:1], v0, off
|
||||
|
@ -58,7 +58,7 @@ define amdgpu_kernel void @localize_constants(i1 %cond) {
|
|||
; GFX9-NEXT: v_mov_b32_e32 v0, 0x5be6
|
||||
; GFX9-NEXT: global_store_dword v[0:1], v0, off
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: BB0_4: ; %bb2
|
||||
; GFX9-NEXT: .LBB0_4: ; %bb2
|
||||
; GFX9-NEXT: s_endpgm
|
||||
entry:
|
||||
br i1 %cond, label %bb0, label %bb1
|
||||
|
@ -101,7 +101,7 @@ define amdgpu_kernel void @localize_globals(i1 %cond) {
|
|||
; GFX9-NEXT: s_xor_b32 s1, s1, -1
|
||||
; GFX9-NEXT: s_and_b32 s1, s1, 1
|
||||
; GFX9-NEXT: s_cmp_lg_u32 s1, 0
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB1_2
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB1_2
|
||||
; GFX9-NEXT: ; %bb.1: ; %bb1
|
||||
; GFX9-NEXT: s_getpc_b64 s[0:1]
|
||||
; GFX9-NEXT: s_add_u32 s0, s0, gv2@gotpcrel32@lo+4
|
||||
|
@ -119,11 +119,11 @@ define amdgpu_kernel void @localize_globals(i1 %cond) {
|
|||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: global_store_dword v0, v1, s[6:7]
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: BB1_2: ; %Flow
|
||||
; GFX9-NEXT: .LBB1_2: ; %Flow
|
||||
; GFX9-NEXT: s_xor_b32 s0, s0, -1
|
||||
; GFX9-NEXT: s_and_b32 s0, s0, 1
|
||||
; GFX9-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; GFX9-NEXT: s_cbranch_scc1 BB1_4
|
||||
; GFX9-NEXT: s_cbranch_scc1 .LBB1_4
|
||||
; GFX9-NEXT: ; %bb.3: ; %bb0
|
||||
; GFX9-NEXT: s_getpc_b64 s[0:1]
|
||||
; GFX9-NEXT: s_add_u32 s0, s0, gv0@gotpcrel32@lo+4
|
||||
|
@ -140,7 +140,7 @@ define amdgpu_kernel void @localize_globals(i1 %cond) {
|
|||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: global_store_dword v0, v1, s[2:3]
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: BB1_4: ; %bb2
|
||||
; GFX9-NEXT: .LBB1_4: ; %bb2
|
||||
; GFX9-NEXT: s_endpgm
|
||||
entry:
|
||||
br i1 %cond, label %bb0, label %bb1
|
||||
|
@ -173,7 +173,7 @@ define void @localize_internal_globals(i1 %cond) {
|
|||
; GFX9-NEXT: s_xor_b64 s[4:5], vcc, -1
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
|
||||
; GFX9-NEXT: s_cbranch_execz BB2_2
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB2_2
|
||||
; GFX9-NEXT: ; %bb.1: ; %bb1
|
||||
; GFX9-NEXT: s_getpc_b64 s[6:7]
|
||||
; GFX9-NEXT: s_add_u32 s6, s6, static.gv2@rel32@lo+4
|
||||
|
@ -187,10 +187,10 @@ define void @localize_internal_globals(i1 %cond) {
|
|||
; GFX9-NEXT: v_mov_b32_e32 v1, 1
|
||||
; GFX9-NEXT: global_store_dword v0, v1, s[6:7]
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: BB2_2: ; %Flow
|
||||
; GFX9-NEXT: .LBB2_2: ; %Flow
|
||||
; GFX9-NEXT: s_or_saveexec_b64 s[4:5], s[4:5]
|
||||
; GFX9-NEXT: s_xor_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: s_cbranch_execz BB2_4
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB2_4
|
||||
; GFX9-NEXT: ; %bb.3: ; %bb0
|
||||
; GFX9-NEXT: s_getpc_b64 s[6:7]
|
||||
; GFX9-NEXT: s_add_u32 s6, s6, static.gv0@rel32@lo+4
|
||||
|
@ -204,7 +204,7 @@ define void @localize_internal_globals(i1 %cond) {
|
|||
; GFX9-NEXT: v_mov_b32_e32 v1, 1
|
||||
; GFX9-NEXT: global_store_dword v0, v1, s[6:7]
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: BB2_4: ; %bb2
|
||||
; GFX9-NEXT: .LBB2_4: ; %bb2
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
|
|
|
@ -20,12 +20,12 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
|
|||
; GCN-NEXT: s_mov_b32 s33, 0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_cmp_lg_u32 s6, 0
|
||||
; GCN-NEXT: s_cbranch_scc1 BB0_3
|
||||
; GCN-NEXT: s_cbranch_scc1 .LBB0_3
|
||||
; GCN-NEXT: ; %bb.1: ; %bb.0
|
||||
; GCN-NEXT: s_load_dword s6, s[4:5], 0xc
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_cmp_lg_u32 s6, 0
|
||||
; GCN-NEXT: s_cbranch_scc1 BB0_3
|
||||
; GCN-NEXT: s_cbranch_scc1 .LBB0_3
|
||||
; GCN-NEXT: ; %bb.2: ; %bb.1
|
||||
; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0
|
||||
; GCN-NEXT: s_load_dword s8, s[4:5], 0x10
|
||||
|
@ -43,7 +43,7 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
|
|||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_add_u32_e32 v0, v2, v0
|
||||
; GCN-NEXT: global_store_dword v1, v0, s[6:7]
|
||||
; GCN-NEXT: BB0_3: ; %bb.2
|
||||
; GCN-NEXT: .LBB0_3: ; %bb.2
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: global_store_dword v[0:1], v0, off
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -91,7 +91,7 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
|
|||
; GCN-NEXT: s_mov_b32 s33, 0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_cmp_lg_u32 s6, 0
|
||||
; GCN-NEXT: s_cbranch_scc1 BB1_2
|
||||
; GCN-NEXT: s_cbranch_scc1 .LBB1_2
|
||||
; GCN-NEXT: ; %bb.1: ; %bb.0
|
||||
; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0
|
||||
; GCN-NEXT: s_load_dword s8, s[4:5], 0xc
|
||||
|
@ -110,7 +110,7 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
|
|||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_add_u32_e32 v0, v2, v0
|
||||
; GCN-NEXT: global_store_dword v1, v0, s[6:7]
|
||||
; GCN-NEXT: BB1_2: ; %bb.1
|
||||
; GCN-NEXT: .LBB1_2: ; %bb.1
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: global_store_dword v[0:1], v0, off
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -153,11 +153,11 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i
|
|||
; GCN-NEXT: s_mov_b32 s33, s32
|
||||
; GCN-NEXT: s_addk_i32 s32, 0x400
|
||||
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GCN-NEXT: s_cbranch_execz BB2_3
|
||||
; GCN-NEXT: s_cbranch_execz .LBB2_3
|
||||
; GCN-NEXT: ; %bb.1: ; %bb.0
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
|
||||
; GCN-NEXT: s_and_b64 exec, exec, vcc
|
||||
; GCN-NEXT: s_cbranch_execz BB2_3
|
||||
; GCN-NEXT: s_cbranch_execz .LBB2_3
|
||||
; GCN-NEXT: ; %bb.2: ; %bb.1
|
||||
; GCN-NEXT: s_add_u32 s6, s32, 0x1000
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
||||
|
@ -172,7 +172,7 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i
|
|||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_add_u32_e32 v2, v2, v3
|
||||
; GCN-NEXT: global_store_dword v[0:1], v2, off
|
||||
; GCN-NEXT: BB2_3: ; %bb.2
|
||||
; GCN-NEXT: .LBB2_3: ; %bb.2
|
||||
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: global_store_dword v[0:1], v0, off
|
||||
|
@ -218,7 +218,7 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
|
|||
; GCN-NEXT: s_and_b32 s33, s33, 0xfffff000
|
||||
; GCN-NEXT: s_addk_i32 s32, 0x2000
|
||||
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GCN-NEXT: s_cbranch_execz BB3_2
|
||||
; GCN-NEXT: s_cbranch_execz .LBB3_2
|
||||
; GCN-NEXT: ; %bb.1: ; %bb.0
|
||||
; GCN-NEXT: s_add_u32 s6, s32, 0x1000
|
||||
; GCN-NEXT: s_and_b32 s6, s6, 0xfffff000
|
||||
|
@ -234,7 +234,7 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
|
|||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_add_u32_e32 v2, v2, v3
|
||||
; GCN-NEXT: global_store_dword v[0:1], v2, off
|
||||
; GCN-NEXT: BB3_2: ; %bb.1
|
||||
; GCN-NEXT: .LBB3_2: ; %bb.1
|
||||
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: global_store_dword v[0:1], v0, off
|
||||
|
|
|
@ -16,7 +16,7 @@ define i64 @v_sdiv_i64(i64 %num, i64 %den) {
|
|||
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_execz BB0_2
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB0_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v3
|
||||
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v0
|
||||
|
@ -160,10 +160,10 @@ define i64 @v_sdiv_i64(i64 %num, i64 %den) {
|
|||
; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr2
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr4
|
||||
; CHECK-NEXT: BB0_2: ; %Flow
|
||||
; CHECK-NEXT: .LBB0_2: ; %Flow
|
||||
; CHECK-NEXT: s_or_saveexec_b64 s[6:7], s[6:7]
|
||||
; CHECK-NEXT: s_xor_b64 exec, exec, s[6:7]
|
||||
; CHECK-NEXT: s_cbranch_execz BB0_4
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB0_4
|
||||
; CHECK-NEXT: ; %bb.3:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v2
|
||||
; CHECK-NEXT: v_sub_i32_e32 v1, vcc, 0, v2
|
||||
|
@ -185,7 +185,7 @@ define i64 @v_sdiv_i64(i64 %num, i64 %den) {
|
|||
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: BB0_4:
|
||||
; CHECK-NEXT: .LBB0_4:
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%result = sdiv i64 %num, %den
|
||||
|
@ -203,7 +203,7 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
|
|||
; CHECK-NEXT: s_mov_b32 s1, -1
|
||||
; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], s[0:1]
|
||||
; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[6:7], 0
|
||||
; CHECK-NEXT: s_cbranch_vccz BB1_2
|
||||
; CHECK-NEXT: s_cbranch_vccz .LBB1_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
; CHECK-NEXT: s_ashr_i32 s6, s3, 31
|
||||
; CHECK-NEXT: s_ashr_i32 s8, s5, 31
|
||||
|
@ -351,14 +351,14 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
|
|||
; CHECK-NEXT: v_xor_b32_e32 v0, s0, v0
|
||||
; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s0, v0
|
||||
; CHECK-NEXT: s_mov_b32 s1, 0
|
||||
; CHECK-NEXT: s_branch BB1_3
|
||||
; CHECK-NEXT: BB1_2:
|
||||
; CHECK-NEXT: s_branch .LBB1_3
|
||||
; CHECK-NEXT: .LBB1_2:
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: BB1_3: ; %Flow
|
||||
; CHECK-NEXT: .LBB1_3: ; %Flow
|
||||
; CHECK-NEXT: s_xor_b32 s0, s1, -1
|
||||
; CHECK-NEXT: s_and_b32 s0, s0, 1
|
||||
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; CHECK-NEXT: s_cbranch_scc1 BB1_5
|
||||
; CHECK-NEXT: s_cbranch_scc1 .LBB1_5
|
||||
; CHECK-NEXT: ; %bb.4:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s4
|
||||
; CHECK-NEXT: s_sub_i32 s0, 0, s4
|
||||
|
@ -379,7 +379,7 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
|
|||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0
|
||||
; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||||
; CHECK-NEXT: BB1_5:
|
||||
; CHECK-NEXT: .LBB1_5:
|
||||
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; CHECK-NEXT: s_mov_b32 s1, s0
|
||||
; CHECK-NEXT: ; return to shader part epilog
|
||||
|
@ -694,7 +694,7 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
|||
; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB2_2
|
||||
; CGP-NEXT: s_cbranch_execz .LBB2_2
|
||||
; CGP-NEXT: ; %bb.1:
|
||||
; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v5
|
||||
; CGP-NEXT: v_add_i32_e32 v1, vcc, v4, v0
|
||||
|
@ -838,10 +838,10 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
|||
; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; CGP-NEXT: ; implicit-def: $vgpr4
|
||||
; CGP-NEXT: ; implicit-def: $vgpr10
|
||||
; CGP-NEXT: BB2_2: ; %Flow2
|
||||
; CGP-NEXT: .LBB2_2: ; %Flow2
|
||||
; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7]
|
||||
; CGP-NEXT: s_xor_b64 exec, exec, s[6:7]
|
||||
; CGP-NEXT: s_cbranch_execz BB2_4
|
||||
; CGP-NEXT: s_cbranch_execz .LBB2_4
|
||||
; CGP-NEXT: ; %bb.3:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4
|
||||
; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v4
|
||||
|
@ -863,7 +863,7 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
|||
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||||
; CGP-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CGP-NEXT: BB2_4:
|
||||
; CGP-NEXT: .LBB2_4:
|
||||
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; CGP-NEXT: v_or_b32_e32 v3, v9, v7
|
||||
; CGP-NEXT: v_mov_b32_e32 v2, 0
|
||||
|
@ -871,7 +871,7 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
|||
; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB2_6
|
||||
; CGP-NEXT: s_cbranch_execz .LBB2_6
|
||||
; CGP-NEXT: ; %bb.5:
|
||||
; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v7
|
||||
; CGP-NEXT: v_add_i32_e32 v3, vcc, v6, v2
|
||||
|
@ -1015,10 +1015,10 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
|||
; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v5, vcc
|
||||
; CGP-NEXT: ; implicit-def: $vgpr6
|
||||
; CGP-NEXT: ; implicit-def: $vgpr8
|
||||
; CGP-NEXT: BB2_6: ; %Flow
|
||||
; CGP-NEXT: .LBB2_6: ; %Flow
|
||||
; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7]
|
||||
; CGP-NEXT: s_xor_b64 exec, exec, s[6:7]
|
||||
; CGP-NEXT: s_cbranch_execz BB2_8
|
||||
; CGP-NEXT: s_cbranch_execz .LBB2_8
|
||||
; CGP-NEXT: ; %bb.7:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v2, v6
|
||||
; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v6
|
||||
|
@ -1040,7 +1040,7 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
|||
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v6
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
|
||||
; CGP-NEXT: v_mov_b32_e32 v3, 0
|
||||
; CGP-NEXT: BB2_8:
|
||||
; CGP-NEXT: .LBB2_8:
|
||||
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; CGP-NEXT: s_setpc_b64 s[30:31]
|
||||
%result = sdiv <2 x i64> %num, %den
|
||||
|
@ -2487,7 +2487,7 @@ define i64 @v_sdiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
|
|||
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_execz BB7_2
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB7_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v6
|
||||
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v5, v0
|
||||
|
@ -2631,10 +2631,10 @@ define i64 @v_sdiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
|
|||
; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr5_vgpr6
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr3
|
||||
; CHECK-NEXT: BB7_2: ; %Flow
|
||||
; CHECK-NEXT: .LBB7_2: ; %Flow
|
||||
; CHECK-NEXT: s_or_saveexec_b64 s[6:7], s[6:7]
|
||||
; CHECK-NEXT: s_xor_b64 exec, exec, s[6:7]
|
||||
; CHECK-NEXT: s_cbranch_execz BB7_4
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB7_4
|
||||
; CHECK-NEXT: ; %bb.3:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v5
|
||||
; CHECK-NEXT: v_sub_i32_e32 v1, vcc, 0, v5
|
||||
|
@ -2656,7 +2656,7 @@ define i64 @v_sdiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
|
|||
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: BB7_4:
|
||||
; CHECK-NEXT: .LBB7_4:
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%shl.y = shl i64 4096, %y
|
||||
|
@ -2969,7 +2969,7 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
|||
; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB8_2
|
||||
; CGP-NEXT: s_cbranch_execz .LBB8_2
|
||||
; CGP-NEXT: ; %bb.1:
|
||||
; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v3
|
||||
; CGP-NEXT: v_add_i32_e32 v1, vcc, v2, v0
|
||||
|
@ -3113,10 +3113,10 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
|||
; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; CGP-NEXT: ; implicit-def: $vgpr8
|
||||
; CGP-NEXT: BB8_2: ; %Flow2
|
||||
; CGP-NEXT: .LBB8_2: ; %Flow2
|
||||
; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7]
|
||||
; CGP-NEXT: s_xor_b64 exec, exec, s[6:7]
|
||||
; CGP-NEXT: s_cbranch_execz BB8_4
|
||||
; CGP-NEXT: s_cbranch_execz .LBB8_4
|
||||
; CGP-NEXT: ; %bb.3:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v0, v2
|
||||
; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v2
|
||||
|
@ -3138,7 +3138,7 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
|||
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
|
||||
; CGP-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CGP-NEXT: BB8_4:
|
||||
; CGP-NEXT: .LBB8_4:
|
||||
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; CGP-NEXT: v_or_b32_e32 v3, v7, v11
|
||||
; CGP-NEXT: v_mov_b32_e32 v2, 0
|
||||
|
@ -3146,7 +3146,7 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
|||
; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB8_6
|
||||
; CGP-NEXT: s_cbranch_execz .LBB8_6
|
||||
; CGP-NEXT: ; %bb.5:
|
||||
; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v11
|
||||
; CGP-NEXT: v_add_i32_e32 v3, vcc, v10, v2
|
||||
|
@ -3290,10 +3290,10 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
|||
; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v5, vcc
|
||||
; CGP-NEXT: ; implicit-def: $vgpr10_vgpr11
|
||||
; CGP-NEXT: ; implicit-def: $vgpr5
|
||||
; CGP-NEXT: BB8_6: ; %Flow
|
||||
; CGP-NEXT: .LBB8_6: ; %Flow
|
||||
; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7]
|
||||
; CGP-NEXT: s_xor_b64 exec, exec, s[6:7]
|
||||
; CGP-NEXT: s_cbranch_execz BB8_8
|
||||
; CGP-NEXT: s_cbranch_execz .LBB8_8
|
||||
; CGP-NEXT: ; %bb.7:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v2, v10
|
||||
; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v10
|
||||
|
@ -3315,7 +3315,7 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
|||
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v10
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
|
||||
; CGP-NEXT: v_mov_b32_e32 v3, 0
|
||||
; CGP-NEXT: BB8_8:
|
||||
; CGP-NEXT: .LBB8_8:
|
||||
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; CGP-NEXT: s_setpc_b64 s[30:31]
|
||||
%shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y
|
||||
|
|
|
@ -16,7 +16,7 @@ define i64 @v_srem_i64(i64 %num, i64 %den) {
|
|||
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_execz BB0_2
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB0_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v3
|
||||
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v0
|
||||
|
@ -158,10 +158,10 @@ define i64 @v_srem_i64(i64 %num, i64 %den) {
|
|||
; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v2, v6, vcc
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr2
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr4
|
||||
; CHECK-NEXT: BB0_2: ; %Flow
|
||||
; CHECK-NEXT: .LBB0_2: ; %Flow
|
||||
; CHECK-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
|
||||
; CHECK-NEXT: s_xor_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_execz BB0_4
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB0_4
|
||||
; CHECK-NEXT: ; %bb.3:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v2
|
||||
; CHECK-NEXT: v_sub_i32_e32 v1, vcc, 0, v2
|
||||
|
@ -181,7 +181,7 @@ define i64 @v_srem_i64(i64 %num, i64 %den) {
|
|||
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: BB0_4:
|
||||
; CHECK-NEXT: .LBB0_4:
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%result = srem i64 %num, %den
|
||||
|
@ -199,7 +199,7 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
|
|||
; CHECK-NEXT: s_mov_b32 s1, -1
|
||||
; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], s[0:1]
|
||||
; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[6:7], 0
|
||||
; CHECK-NEXT: s_cbranch_vccz BB1_2
|
||||
; CHECK-NEXT: s_cbranch_vccz .LBB1_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
; CHECK-NEXT: s_ashr_i32 s6, s3, 31
|
||||
; CHECK-NEXT: s_ashr_i32 s0, s5, 31
|
||||
|
@ -345,14 +345,14 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
|
|||
; CHECK-NEXT: v_xor_b32_e32 v0, s6, v0
|
||||
; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0
|
||||
; CHECK-NEXT: s_mov_b32 s1, 0
|
||||
; CHECK-NEXT: s_branch BB1_3
|
||||
; CHECK-NEXT: BB1_2:
|
||||
; CHECK-NEXT: s_branch .LBB1_3
|
||||
; CHECK-NEXT: .LBB1_2:
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: BB1_3: ; %Flow
|
||||
; CHECK-NEXT: .LBB1_3: ; %Flow
|
||||
; CHECK-NEXT: s_xor_b32 s0, s1, -1
|
||||
; CHECK-NEXT: s_and_b32 s0, s0, 1
|
||||
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; CHECK-NEXT: s_cbranch_scc1 BB1_5
|
||||
; CHECK-NEXT: s_cbranch_scc1 .LBB1_5
|
||||
; CHECK-NEXT: ; %bb.4:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s4
|
||||
; CHECK-NEXT: s_sub_i32 s0, 0, s4
|
||||
|
@ -371,7 +371,7 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
|
|||
; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0
|
||||
; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||||
; CHECK-NEXT: BB1_5:
|
||||
; CHECK-NEXT: .LBB1_5:
|
||||
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; CHECK-NEXT: s_mov_b32 s1, s0
|
||||
; CHECK-NEXT: ; return to shader part epilog
|
||||
|
@ -682,7 +682,7 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
|||
; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB2_2
|
||||
; CGP-NEXT: s_cbranch_execz .LBB2_2
|
||||
; CGP-NEXT: ; %bb.1:
|
||||
; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v5
|
||||
; CGP-NEXT: v_add_i32_e32 v1, vcc, v4, v0
|
||||
|
@ -824,10 +824,10 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
|||
; CGP-NEXT: v_subb_u32_e32 v1, vcc, v2, v4, vcc
|
||||
; CGP-NEXT: ; implicit-def: $vgpr4
|
||||
; CGP-NEXT: ; implicit-def: $vgpr10
|
||||
; CGP-NEXT: BB2_2: ; %Flow2
|
||||
; CGP-NEXT: .LBB2_2: ; %Flow2
|
||||
; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
|
||||
; CGP-NEXT: s_xor_b64 exec, exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB2_4
|
||||
; CGP-NEXT: s_cbranch_execz .LBB2_4
|
||||
; CGP-NEXT: ; %bb.3:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4
|
||||
; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v4
|
||||
|
@ -847,7 +847,7 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
|||
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||||
; CGP-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CGP-NEXT: BB2_4:
|
||||
; CGP-NEXT: .LBB2_4:
|
||||
; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CGP-NEXT: v_or_b32_e32 v3, v9, v7
|
||||
; CGP-NEXT: v_mov_b32_e32 v2, 0
|
||||
|
@ -855,7 +855,7 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
|||
; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB2_6
|
||||
; CGP-NEXT: s_cbranch_execz .LBB2_6
|
||||
; CGP-NEXT: ; %bb.5:
|
||||
; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v7
|
||||
; CGP-NEXT: v_add_i32_e32 v3, vcc, v6, v2
|
||||
|
@ -997,10 +997,10 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
|||
; CGP-NEXT: v_subb_u32_e32 v3, vcc, v4, v6, vcc
|
||||
; CGP-NEXT: ; implicit-def: $vgpr6
|
||||
; CGP-NEXT: ; implicit-def: $vgpr8
|
||||
; CGP-NEXT: BB2_6: ; %Flow
|
||||
; CGP-NEXT: .LBB2_6: ; %Flow
|
||||
; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
|
||||
; CGP-NEXT: s_xor_b64 exec, exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB2_8
|
||||
; CGP-NEXT: s_cbranch_execz .LBB2_8
|
||||
; CGP-NEXT: ; %bb.7:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v2, v6
|
||||
; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v6
|
||||
|
@ -1020,7 +1020,7 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
|||
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
|
||||
; CGP-NEXT: v_mov_b32_e32 v3, 0
|
||||
; CGP-NEXT: BB2_8:
|
||||
; CGP-NEXT: .LBB2_8:
|
||||
; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CGP-NEXT: s_setpc_b64 s[30:31]
|
||||
%result = srem <2 x i64> %num, %den
|
||||
|
@ -2451,7 +2451,7 @@ define i64 @v_srem_i64_pow2_shl_denom(i64 %x, i64 %y) {
|
|||
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_execz BB7_2
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB7_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v6
|
||||
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v5, v0
|
||||
|
@ -2593,10 +2593,10 @@ define i64 @v_srem_i64_pow2_shl_denom(i64 %x, i64 %y) {
|
|||
; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v2, v6, vcc
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr5_vgpr6
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr3
|
||||
; CHECK-NEXT: BB7_2: ; %Flow
|
||||
; CHECK-NEXT: .LBB7_2: ; %Flow
|
||||
; CHECK-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
|
||||
; CHECK-NEXT: s_xor_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_execz BB7_4
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB7_4
|
||||
; CHECK-NEXT: ; %bb.3:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v5
|
||||
; CHECK-NEXT: v_sub_i32_e32 v1, vcc, 0, v5
|
||||
|
@ -2616,7 +2616,7 @@ define i64 @v_srem_i64_pow2_shl_denom(i64 %x, i64 %y) {
|
|||
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: BB7_4:
|
||||
; CHECK-NEXT: .LBB7_4:
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%shl.y = shl i64 4096, %y
|
||||
|
@ -2925,7 +2925,7 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
|||
; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB8_2
|
||||
; CGP-NEXT: s_cbranch_execz .LBB8_2
|
||||
; CGP-NEXT: ; %bb.1:
|
||||
; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v3
|
||||
; CGP-NEXT: v_add_i32_e32 v1, vcc, v2, v0
|
||||
|
@ -3067,10 +3067,10 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
|||
; CGP-NEXT: v_subb_u32_e32 v1, vcc, v2, v4, vcc
|
||||
; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; CGP-NEXT: ; implicit-def: $vgpr8
|
||||
; CGP-NEXT: BB8_2: ; %Flow2
|
||||
; CGP-NEXT: .LBB8_2: ; %Flow2
|
||||
; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
|
||||
; CGP-NEXT: s_xor_b64 exec, exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB8_4
|
||||
; CGP-NEXT: s_cbranch_execz .LBB8_4
|
||||
; CGP-NEXT: ; %bb.3:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v0, v2
|
||||
; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v2
|
||||
|
@ -3090,7 +3090,7 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
|||
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||||
; CGP-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CGP-NEXT: BB8_4:
|
||||
; CGP-NEXT: .LBB8_4:
|
||||
; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CGP-NEXT: v_or_b32_e32 v3, v7, v11
|
||||
; CGP-NEXT: v_mov_b32_e32 v2, 0
|
||||
|
@ -3098,7 +3098,7 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
|||
; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB8_6
|
||||
; CGP-NEXT: s_cbranch_execz .LBB8_6
|
||||
; CGP-NEXT: ; %bb.5:
|
||||
; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v11
|
||||
; CGP-NEXT: v_add_i32_e32 v3, vcc, v10, v2
|
||||
|
@ -3240,10 +3240,10 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
|||
; CGP-NEXT: v_subb_u32_e32 v3, vcc, v4, v8, vcc
|
||||
; CGP-NEXT: ; implicit-def: $vgpr10_vgpr11
|
||||
; CGP-NEXT: ; implicit-def: $vgpr5
|
||||
; CGP-NEXT: BB8_6: ; %Flow
|
||||
; CGP-NEXT: .LBB8_6: ; %Flow
|
||||
; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
|
||||
; CGP-NEXT: s_xor_b64 exec, exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB8_8
|
||||
; CGP-NEXT: s_cbranch_execz .LBB8_8
|
||||
; CGP-NEXT: ; %bb.7:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v2, v10
|
||||
; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v10
|
||||
|
@ -3263,7 +3263,7 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
|||
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v10
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
|
||||
; CGP-NEXT: v_mov_b32_e32 v3, 0
|
||||
; CGP-NEXT: BB8_8:
|
||||
; CGP-NEXT: .LBB8_8:
|
||||
; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CGP-NEXT: s_setpc_b64 s[30:31]
|
||||
%shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y
|
||||
|
|
|
@ -16,7 +16,7 @@ define i64 @v_udiv_i64(i64 %num, i64 %den) {
|
|||
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_execz BB0_2
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB0_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v2
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v3
|
||||
|
@ -145,10 +145,10 @@ define i64 @v_udiv_i64(i64 %num, i64 %den) {
|
|||
; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr2
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr4
|
||||
; CHECK-NEXT: BB0_2: ; %Flow
|
||||
; CHECK-NEXT: .LBB0_2: ; %Flow
|
||||
; CHECK-NEXT: s_or_saveexec_b64 s[6:7], s[6:7]
|
||||
; CHECK-NEXT: s_xor_b64 exec, exec, s[6:7]
|
||||
; CHECK-NEXT: s_cbranch_execz BB0_4
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB0_4
|
||||
; CHECK-NEXT: ; %bb.3:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v2
|
||||
; CHECK-NEXT: v_sub_i32_e32 v1, vcc, 0, v2
|
||||
|
@ -170,7 +170,7 @@ define i64 @v_udiv_i64(i64 %num, i64 %den) {
|
|||
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: BB0_4:
|
||||
; CHECK-NEXT: .LBB0_4:
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%result = udiv i64 %num, %den
|
||||
|
@ -188,7 +188,7 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
|
|||
; CHECK-NEXT: s_mov_b32 s5, -1
|
||||
; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], s[4:5]
|
||||
; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[6:7], 0
|
||||
; CHECK-NEXT: s_cbranch_vccz BB1_2
|
||||
; CHECK-NEXT: s_cbranch_vccz .LBB1_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, s3
|
||||
|
@ -317,14 +317,14 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
|
|||
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||||
; CHECK-NEXT: s_mov_b32 s5, 0
|
||||
; CHECK-NEXT: s_branch BB1_3
|
||||
; CHECK-NEXT: BB1_2:
|
||||
; CHECK-NEXT: s_branch .LBB1_3
|
||||
; CHECK-NEXT: .LBB1_2:
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: BB1_3: ; %Flow
|
||||
; CHECK-NEXT: .LBB1_3: ; %Flow
|
||||
; CHECK-NEXT: s_xor_b32 s1, s5, -1
|
||||
; CHECK-NEXT: s_and_b32 s1, s1, 1
|
||||
; CHECK-NEXT: s_cmp_lg_u32 s1, 0
|
||||
; CHECK-NEXT: s_cbranch_scc1 BB1_5
|
||||
; CHECK-NEXT: s_cbranch_scc1 .LBB1_5
|
||||
; CHECK-NEXT: ; %bb.4:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2
|
||||
; CHECK-NEXT: s_sub_i32 s1, 0, s2
|
||||
|
@ -345,7 +345,7 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
|
|||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0
|
||||
; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v1
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||||
; CHECK-NEXT: BB1_5:
|
||||
; CHECK-NEXT: .LBB1_5:
|
||||
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; CHECK-NEXT: s_mov_b32 s1, s0
|
||||
; CHECK-NEXT: ; return to shader part epilog
|
||||
|
@ -630,7 +630,7 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
|||
; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB2_2
|
||||
; CGP-NEXT: s_cbranch_execz .LBB2_2
|
||||
; CGP-NEXT: ; %bb.1:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v1, v5
|
||||
|
@ -759,10 +759,10 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
|||
; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
|
||||
; CGP-NEXT: ; implicit-def: $vgpr4
|
||||
; CGP-NEXT: ; implicit-def: $vgpr10
|
||||
; CGP-NEXT: BB2_2: ; %Flow2
|
||||
; CGP-NEXT: .LBB2_2: ; %Flow2
|
||||
; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7]
|
||||
; CGP-NEXT: s_xor_b64 exec, exec, s[6:7]
|
||||
; CGP-NEXT: s_cbranch_execz BB2_4
|
||||
; CGP-NEXT: s_cbranch_execz .LBB2_4
|
||||
; CGP-NEXT: ; %bb.3:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4
|
||||
; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v4
|
||||
|
@ -784,7 +784,7 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
|||
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||||
; CGP-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CGP-NEXT: BB2_4:
|
||||
; CGP-NEXT: .LBB2_4:
|
||||
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; CGP-NEXT: v_or_b32_e32 v3, v9, v7
|
||||
; CGP-NEXT: v_mov_b32_e32 v2, 0
|
||||
|
@ -792,7 +792,7 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
|||
; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB2_6
|
||||
; CGP-NEXT: s_cbranch_execz .LBB2_6
|
||||
; CGP-NEXT: ; %bb.5:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v2, v6
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v3, v7
|
||||
|
@ -921,10 +921,10 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
|||
; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
|
||||
; CGP-NEXT: ; implicit-def: $vgpr6
|
||||
; CGP-NEXT: ; implicit-def: $vgpr8
|
||||
; CGP-NEXT: BB2_6: ; %Flow
|
||||
; CGP-NEXT: .LBB2_6: ; %Flow
|
||||
; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7]
|
||||
; CGP-NEXT: s_xor_b64 exec, exec, s[6:7]
|
||||
; CGP-NEXT: s_cbranch_execz BB2_8
|
||||
; CGP-NEXT: s_cbranch_execz .LBB2_8
|
||||
; CGP-NEXT: ; %bb.7:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v2, v6
|
||||
; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v6
|
||||
|
@ -946,7 +946,7 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
|||
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v6
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
|
||||
; CGP-NEXT: v_mov_b32_e32 v3, 0
|
||||
; CGP-NEXT: BB2_8:
|
||||
; CGP-NEXT: .LBB2_8:
|
||||
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; CGP-NEXT: s_setpc_b64 s[30:31]
|
||||
%result = udiv <2 x i64> %num, %den
|
||||
|
@ -1076,7 +1076,7 @@ define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
|
|||
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_execz BB7_2
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB7_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v5
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v6
|
||||
|
@ -1205,10 +1205,10 @@ define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
|
|||
; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr5_vgpr6
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr3
|
||||
; CHECK-NEXT: BB7_2: ; %Flow
|
||||
; CHECK-NEXT: .LBB7_2: ; %Flow
|
||||
; CHECK-NEXT: s_or_saveexec_b64 s[6:7], s[6:7]
|
||||
; CHECK-NEXT: s_xor_b64 exec, exec, s[6:7]
|
||||
; CHECK-NEXT: s_cbranch_execz BB7_4
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB7_4
|
||||
; CHECK-NEXT: ; %bb.3:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v5
|
||||
; CHECK-NEXT: v_sub_i32_e32 v1, vcc, 0, v5
|
||||
|
@ -1230,7 +1230,7 @@ define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
|
|||
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: BB7_4:
|
||||
; CHECK-NEXT: .LBB7_4:
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%shl.y = shl i64 4096, %y
|
||||
|
@ -1513,7 +1513,7 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
|||
; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB8_2
|
||||
; CGP-NEXT: s_cbranch_execz .LBB8_2
|
||||
; CGP-NEXT: ; %bb.1:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v0, v2
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v1, v3
|
||||
|
@ -1642,10 +1642,10 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
|||
; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
|
||||
; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; CGP-NEXT: ; implicit-def: $vgpr8
|
||||
; CGP-NEXT: BB8_2: ; %Flow2
|
||||
; CGP-NEXT: .LBB8_2: ; %Flow2
|
||||
; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7]
|
||||
; CGP-NEXT: s_xor_b64 exec, exec, s[6:7]
|
||||
; CGP-NEXT: s_cbranch_execz BB8_4
|
||||
; CGP-NEXT: s_cbranch_execz .LBB8_4
|
||||
; CGP-NEXT: ; %bb.3:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v0, v2
|
||||
; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v2
|
||||
|
@ -1667,7 +1667,7 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
|||
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
|
||||
; CGP-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CGP-NEXT: BB8_4:
|
||||
; CGP-NEXT: .LBB8_4:
|
||||
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; CGP-NEXT: v_or_b32_e32 v3, v7, v11
|
||||
; CGP-NEXT: v_mov_b32_e32 v2, 0
|
||||
|
@ -1675,7 +1675,7 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
|||
; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB8_6
|
||||
; CGP-NEXT: s_cbranch_execz .LBB8_6
|
||||
; CGP-NEXT: ; %bb.5:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v2, v10
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v3, v11
|
||||
|
@ -1804,10 +1804,10 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
|||
; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
|
||||
; CGP-NEXT: ; implicit-def: $vgpr10_vgpr11
|
||||
; CGP-NEXT: ; implicit-def: $vgpr5
|
||||
; CGP-NEXT: BB8_6: ; %Flow
|
||||
; CGP-NEXT: .LBB8_6: ; %Flow
|
||||
; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7]
|
||||
; CGP-NEXT: s_xor_b64 exec, exec, s[6:7]
|
||||
; CGP-NEXT: s_cbranch_execz BB8_8
|
||||
; CGP-NEXT: s_cbranch_execz .LBB8_8
|
||||
; CGP-NEXT: ; %bb.7:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v2, v10
|
||||
; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v10
|
||||
|
@ -1829,7 +1829,7 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
|||
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v10
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
|
||||
; CGP-NEXT: v_mov_b32_e32 v3, 0
|
||||
; CGP-NEXT: BB8_8:
|
||||
; CGP-NEXT: .LBB8_8:
|
||||
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; CGP-NEXT: s_setpc_b64 s[30:31]
|
||||
%shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y
|
||||
|
|
|
@ -16,7 +16,7 @@ define i64 @v_urem_i64(i64 %num, i64 %den) {
|
|||
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_execz BB0_2
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB0_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v2
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v3
|
||||
|
@ -144,10 +144,10 @@ define i64 @v_urem_i64(i64 %num, i64 %den) {
|
|||
; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr2
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr4
|
||||
; CHECK-NEXT: BB0_2: ; %Flow
|
||||
; CHECK-NEXT: .LBB0_2: ; %Flow
|
||||
; CHECK-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
|
||||
; CHECK-NEXT: s_xor_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_execz BB0_4
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB0_4
|
||||
; CHECK-NEXT: ; %bb.3:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v2
|
||||
; CHECK-NEXT: v_sub_i32_e32 v1, vcc, 0, v2
|
||||
|
@ -167,7 +167,7 @@ define i64 @v_urem_i64(i64 %num, i64 %den) {
|
|||
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: BB0_4:
|
||||
; CHECK-NEXT: .LBB0_4:
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%result = urem i64 %num, %den
|
||||
|
@ -185,7 +185,7 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
|
|||
; CHECK-NEXT: s_mov_b32 s5, -1
|
||||
; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], s[4:5]
|
||||
; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[6:7], 0
|
||||
; CHECK-NEXT: s_cbranch_vccz BB1_2
|
||||
; CHECK-NEXT: s_cbranch_vccz .LBB1_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, s3
|
||||
|
@ -313,14 +313,14 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
|
|||
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
|
||||
; CHECK-NEXT: s_mov_b32 s5, 0
|
||||
; CHECK-NEXT: s_branch BB1_3
|
||||
; CHECK-NEXT: BB1_2:
|
||||
; CHECK-NEXT: s_branch .LBB1_3
|
||||
; CHECK-NEXT: .LBB1_2:
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: BB1_3: ; %Flow
|
||||
; CHECK-NEXT: .LBB1_3: ; %Flow
|
||||
; CHECK-NEXT: s_xor_b32 s1, s5, -1
|
||||
; CHECK-NEXT: s_and_b32 s1, s1, 1
|
||||
; CHECK-NEXT: s_cmp_lg_u32 s1, 0
|
||||
; CHECK-NEXT: s_cbranch_scc1 BB1_5
|
||||
; CHECK-NEXT: s_cbranch_scc1 .LBB1_5
|
||||
; CHECK-NEXT: ; %bb.4:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2
|
||||
; CHECK-NEXT: s_sub_i32 s1, 0, s2
|
||||
|
@ -339,7 +339,7 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
|
|||
; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s2, v0
|
||||
; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v0
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||||
; CHECK-NEXT: BB1_5:
|
||||
; CHECK-NEXT: .LBB1_5:
|
||||
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; CHECK-NEXT: s_mov_b32 s1, s0
|
||||
; CHECK-NEXT: ; return to shader part epilog
|
||||
|
@ -622,7 +622,7 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
|||
; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB2_2
|
||||
; CGP-NEXT: s_cbranch_execz .LBB2_2
|
||||
; CGP-NEXT: ; %bb.1:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v1, v5
|
||||
|
@ -750,10 +750,10 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
|||
; CGP-NEXT: v_cndmask_b32_e32 v1, v2, v5, vcc
|
||||
; CGP-NEXT: ; implicit-def: $vgpr4
|
||||
; CGP-NEXT: ; implicit-def: $vgpr10
|
||||
; CGP-NEXT: BB2_2: ; %Flow2
|
||||
; CGP-NEXT: .LBB2_2: ; %Flow2
|
||||
; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
|
||||
; CGP-NEXT: s_xor_b64 exec, exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB2_4
|
||||
; CGP-NEXT: s_cbranch_execz .LBB2_4
|
||||
; CGP-NEXT: ; %bb.3:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4
|
||||
; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v4
|
||||
|
@ -773,7 +773,7 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
|||
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||||
; CGP-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CGP-NEXT: BB2_4:
|
||||
; CGP-NEXT: .LBB2_4:
|
||||
; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CGP-NEXT: v_or_b32_e32 v3, v9, v7
|
||||
; CGP-NEXT: v_mov_b32_e32 v2, 0
|
||||
|
@ -781,7 +781,7 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
|||
; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB2_6
|
||||
; CGP-NEXT: s_cbranch_execz .LBB2_6
|
||||
; CGP-NEXT: ; %bb.5:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v2, v6
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v3, v7
|
||||
|
@ -909,10 +909,10 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
|||
; CGP-NEXT: v_cndmask_b32_e32 v3, v4, v7, vcc
|
||||
; CGP-NEXT: ; implicit-def: $vgpr6
|
||||
; CGP-NEXT: ; implicit-def: $vgpr8
|
||||
; CGP-NEXT: BB2_6: ; %Flow
|
||||
; CGP-NEXT: .LBB2_6: ; %Flow
|
||||
; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
|
||||
; CGP-NEXT: s_xor_b64 exec, exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB2_8
|
||||
; CGP-NEXT: s_cbranch_execz .LBB2_8
|
||||
; CGP-NEXT: ; %bb.7:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v2, v6
|
||||
; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v6
|
||||
|
@ -932,7 +932,7 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
|||
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
|
||||
; CGP-NEXT: v_mov_b32_e32 v3, 0
|
||||
; CGP-NEXT: BB2_8:
|
||||
; CGP-NEXT: .LBB2_8:
|
||||
; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CGP-NEXT: s_setpc_b64 s[30:31]
|
||||
%result = urem <2 x i64> %num, %den
|
||||
|
@ -1619,7 +1619,7 @@ define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) {
|
|||
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_execz BB7_2
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB7_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v5
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v6
|
||||
|
@ -1747,10 +1747,10 @@ define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) {
|
|||
; CHECK-NEXT: v_cndmask_b32_e32 v1, v2, v5, vcc
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr5_vgpr6
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr3
|
||||
; CHECK-NEXT: BB7_2: ; %Flow
|
||||
; CHECK-NEXT: .LBB7_2: ; %Flow
|
||||
; CHECK-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
|
||||
; CHECK-NEXT: s_xor_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_execz BB7_4
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB7_4
|
||||
; CHECK-NEXT: ; %bb.3:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v5
|
||||
; CHECK-NEXT: v_sub_i32_e32 v1, vcc, 0, v5
|
||||
|
@ -1770,7 +1770,7 @@ define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) {
|
|||
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: BB7_4:
|
||||
; CHECK-NEXT: .LBB7_4:
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%shl.y = shl i64 4096, %y
|
||||
|
@ -2051,7 +2051,7 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
|||
; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB8_2
|
||||
; CGP-NEXT: s_cbranch_execz .LBB8_2
|
||||
; CGP-NEXT: ; %bb.1:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v0, v2
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v1, v3
|
||||
|
@ -2179,10 +2179,10 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
|||
; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
|
||||
; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; CGP-NEXT: ; implicit-def: $vgpr8
|
||||
; CGP-NEXT: BB8_2: ; %Flow2
|
||||
; CGP-NEXT: .LBB8_2: ; %Flow2
|
||||
; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
|
||||
; CGP-NEXT: s_xor_b64 exec, exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB8_4
|
||||
; CGP-NEXT: s_cbranch_execz .LBB8_4
|
||||
; CGP-NEXT: ; %bb.3:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v0, v2
|
||||
; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v2
|
||||
|
@ -2202,7 +2202,7 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
|||
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||||
; CGP-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CGP-NEXT: BB8_4:
|
||||
; CGP-NEXT: .LBB8_4:
|
||||
; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CGP-NEXT: v_or_b32_e32 v3, v7, v11
|
||||
; CGP-NEXT: v_mov_b32_e32 v2, 0
|
||||
|
@ -2210,7 +2210,7 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
|||
; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB8_6
|
||||
; CGP-NEXT: s_cbranch_execz .LBB8_6
|
||||
; CGP-NEXT: ; %bb.5:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v2, v10
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v3, v11
|
||||
|
@ -2338,10 +2338,10 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
|||
; CGP-NEXT: v_cndmask_b32_e32 v3, v4, v7, vcc
|
||||
; CGP-NEXT: ; implicit-def: $vgpr10_vgpr11
|
||||
; CGP-NEXT: ; implicit-def: $vgpr5
|
||||
; CGP-NEXT: BB8_6: ; %Flow
|
||||
; CGP-NEXT: .LBB8_6: ; %Flow
|
||||
; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
|
||||
; CGP-NEXT: s_xor_b64 exec, exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB8_8
|
||||
; CGP-NEXT: s_cbranch_execz .LBB8_8
|
||||
; CGP-NEXT: ; %bb.7:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v2, v10
|
||||
; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v10
|
||||
|
@ -2361,7 +2361,7 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
|||
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v10
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
|
||||
; CGP-NEXT: v_mov_b32_e32 v3, 0
|
||||
; CGP-NEXT: BB8_8:
|
||||
; CGP-NEXT: .LBB8_8:
|
||||
; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CGP-NEXT: s_setpc_b64 s[30:31]
|
||||
%shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y
|
||||
|
|
|
@ -35,7 +35,7 @@ define amdgpu_kernel void @test2(i32* %p, i32 %x) {
|
|||
; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_cmp_lt_i32 s2, 1
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB2_2
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB2_2
|
||||
; GFX9-NEXT: ; %bb.1: ; %else
|
||||
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, s2
|
||||
|
@ -44,7 +44,7 @@ define amdgpu_kernel void @test2(i32* %p, i32 %x) {
|
|||
; GFX9-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX9-NEXT: flat_store_dword v[0:1], v2
|
||||
; GFX9-NEXT: s_endpgm
|
||||
; GFX9-NEXT: BB2_2: ; %then
|
||||
; GFX9-NEXT: .LBB2_2: ; %then
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
; GFX10-LABEL: test2:
|
||||
|
@ -52,7 +52,7 @@ define amdgpu_kernel void @test2(i32* %p, i32 %x) {
|
|||
; GFX10-NEXT: s_load_dword s2, s[0:1], 0x2c
|
||||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: s_cmp_lt_i32 s2, 1
|
||||
; GFX10-NEXT: s_cbranch_scc0 BB2_2
|
||||
; GFX10-NEXT: s_cbranch_scc0 .LBB2_2
|
||||
; GFX10-NEXT: ; %bb.1: ; %else
|
||||
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, s2
|
||||
|
@ -61,7 +61,7 @@ define amdgpu_kernel void @test2(i32* %p, i32 %x) {
|
|||
; GFX10-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX10-NEXT: flat_store_dword v[0:1], v2
|
||||
; GFX10-NEXT: s_endpgm
|
||||
; GFX10-NEXT: BB2_2: ; %then
|
||||
; GFX10-NEXT: .LBB2_2: ; %then
|
||||
; GFX10-NEXT: s_endpgm
|
||||
%cond = icmp sgt i32 %x, 0
|
||||
br i1 %cond, label %then, label %else
|
||||
|
|
|
@ -19,7 +19,7 @@ define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out, i32 addrspac
|
|||
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX7LESS-NEXT: ; implicit-def: $vgpr1
|
||||
; GFX7LESS-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GFX7LESS-NEXT: s_cbranch_execz BB0_2
|
||||
; GFX7LESS-NEXT: s_cbranch_execz .LBB0_2
|
||||
; GFX7LESS-NEXT: ; %bb.1:
|
||||
; GFX7LESS-NEXT: s_mov_b32 s11, 0xf000
|
||||
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
|
||||
|
@ -33,7 +33,7 @@ define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out, i32 addrspac
|
|||
; GFX7LESS-NEXT: buffer_atomic_add v1, off, s[8:11], 0 glc
|
||||
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7LESS-NEXT: buffer_wbinvl1
|
||||
; GFX7LESS-NEXT: BB0_2:
|
||||
; GFX7LESS-NEXT: .LBB0_2:
|
||||
; GFX7LESS-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000
|
||||
|
@ -52,7 +52,7 @@ define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out, i32 addrspac
|
|||
; GFX89-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX89-NEXT: ; implicit-def: $vgpr1
|
||||
; GFX89-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GFX89-NEXT: s_cbranch_execz BB0_2
|
||||
; GFX89-NEXT: s_cbranch_execz .LBB0_2
|
||||
; GFX89-NEXT: ; %bb.1:
|
||||
; GFX89-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX89-NEXT: s_mov_b32 s8, s2
|
||||
|
@ -66,7 +66,7 @@ define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out, i32 addrspac
|
|||
; GFX89-NEXT: buffer_atomic_add v1, off, s[8:11], 0 glc
|
||||
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX89-NEXT: buffer_wbinvl1_vol
|
||||
; GFX89-NEXT: BB0_2:
|
||||
; GFX89-NEXT: .LBB0_2:
|
||||
; GFX89-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX89-NEXT: v_readfirstlane_b32 s4, v1
|
||||
; GFX89-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -85,7 +85,7 @@ define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out, i32 addrspac
|
|||
; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s7, v0
|
||||
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GFX1064-NEXT: s_cbranch_execz BB0_2
|
||||
; GFX1064-NEXT: s_cbranch_execz .LBB0_2
|
||||
; GFX1064-NEXT: ; %bb.1:
|
||||
; GFX1064-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
|
||||
; GFX1064-NEXT: s_mov_b32 s11, 0x31016000
|
||||
|
@ -101,7 +101,7 @@ define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out, i32 addrspac
|
|||
; GFX1064-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: buffer_gl1_inv
|
||||
; GFX1064-NEXT: BB0_2:
|
||||
; GFX1064-NEXT: .LBB0_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -120,7 +120,7 @@ define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out, i32 addrspac
|
|||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s5, 0
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz BB0_2
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB0_2
|
||||
; GFX1032-NEXT: ; %bb.1:
|
||||
; GFX1032-NEXT: s_bcnt1_i32_b32 s5, s5
|
||||
; GFX1032-NEXT: s_mov_b32 s11, 0x31016000
|
||||
|
@ -136,7 +136,7 @@ define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out, i32 addrspac
|
|||
; GFX1032-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: buffer_gl1_inv
|
||||
; GFX1032-NEXT: BB0_2:
|
||||
; GFX1032-NEXT: .LBB0_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s4
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -163,7 +163,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX7LESS-NEXT: ; implicit-def: $vgpr1
|
||||
; GFX7LESS-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; GFX7LESS-NEXT: s_cbranch_execz BB1_2
|
||||
; GFX7LESS-NEXT: s_cbranch_execz .LBB1_2
|
||||
; GFX7LESS-NEXT: ; %bb.1:
|
||||
; GFX7LESS-NEXT: s_mov_b32 s15, 0xf000
|
||||
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
|
||||
|
@ -177,7 +177,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX7LESS-NEXT: buffer_atomic_add v1, off, s[12:15], 0 glc
|
||||
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7LESS-NEXT: buffer_wbinvl1
|
||||
; GFX7LESS-NEXT: BB1_2:
|
||||
; GFX7LESS-NEXT: .LBB1_2:
|
||||
; GFX7LESS-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7LESS-NEXT: s_mov_b32 s7, 0xf000
|
||||
|
@ -198,7 +198,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX8-NEXT: ; implicit-def: $vgpr1
|
||||
; GFX8-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; GFX8-NEXT: s_cbranch_execz BB1_2
|
||||
; GFX8-NEXT: s_cbranch_execz .LBB1_2
|
||||
; GFX8-NEXT: ; %bb.1:
|
||||
; GFX8-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -212,7 +212,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX8-NEXT: buffer_atomic_add v1, off, s[12:15], 0 glc
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX8-NEXT: buffer_wbinvl1_vol
|
||||
; GFX8-NEXT: BB1_2:
|
||||
; GFX8-NEXT: .LBB1_2:
|
||||
; GFX8-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: v_mul_lo_u32 v0, s8, v0
|
||||
|
@ -233,7 +233,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX9-NEXT: ; implicit-def: $vgpr1
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; GFX9-NEXT: s_cbranch_execz BB1_2
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB1_2
|
||||
; GFX9-NEXT: ; %bb.1:
|
||||
; GFX9-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -247,7 +247,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX9-NEXT: buffer_atomic_add v1, off, s[12:15], 0 glc
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: BB1_2:
|
||||
; GFX9-NEXT: .LBB1_2:
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: v_mul_lo_u32 v0, s8, v0
|
||||
|
@ -269,7 +269,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s3, v0
|
||||
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; GFX1064-NEXT: s_cbranch_execz BB1_2
|
||||
; GFX1064-NEXT: s_cbranch_execz .LBB1_2
|
||||
; GFX1064-NEXT: ; %bb.1:
|
||||
; GFX1064-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
|
||||
; GFX1064-NEXT: s_mov_b32 s15, 0x31016000
|
||||
|
@ -285,7 +285,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX1064-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: buffer_gl1_inv
|
||||
; GFX1064-NEXT: BB1_2:
|
||||
; GFX1064-NEXT: .LBB1_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1064-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -307,7 +307,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s3, 0
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz BB1_2
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB1_2
|
||||
; GFX1032-NEXT: ; %bb.1:
|
||||
; GFX1032-NEXT: s_bcnt1_i32_b32 s1, s3
|
||||
; GFX1032-NEXT: s_mov_b32 s11, 0x31016000
|
||||
|
@ -323,7 +323,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX1032-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: buffer_gl1_inv
|
||||
; GFX1032-NEXT: BB1_2:
|
||||
; GFX1032-NEXT: .LBB1_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s0
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -391,7 +391,7 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX8-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GFX8-NEXT: s_cbranch_execz BB2_2
|
||||
; GFX8-NEXT: s_cbranch_execz .LBB2_2
|
||||
; GFX8-NEXT: ; %bb.1:
|
||||
; GFX8-NEXT: s_mov_b32 s11, 0xf000
|
||||
; GFX8-NEXT: s_mov_b32 s10, -1
|
||||
|
@ -403,7 +403,7 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX8-NEXT: buffer_atomic_add v0, off, s[8:11], 0 glc
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX8-NEXT: buffer_wbinvl1_vol
|
||||
; GFX8-NEXT: BB2_2:
|
||||
; GFX8-NEXT: .LBB2_2:
|
||||
; GFX8-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX8-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, v1
|
||||
|
@ -445,7 +445,7 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX9-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GFX9-NEXT: s_cbranch_execz BB2_2
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB2_2
|
||||
; GFX9-NEXT: ; %bb.1:
|
||||
; GFX9-NEXT: s_mov_b32 s11, 0xf000
|
||||
; GFX9-NEXT: s_mov_b32 s10, -1
|
||||
|
@ -457,7 +457,7 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX9-NEXT: buffer_atomic_add v0, off, s[8:11], 0 glc
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: BB2_2:
|
||||
; GFX9-NEXT: .LBB2_2:
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, v1
|
||||
|
@ -509,7 +509,7 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX1064-NEXT: s_mov_b32 s6, -1
|
||||
; GFX1064-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1064-NEXT: s_and_saveexec_b64 s[8:9], vcc
|
||||
; GFX1064-NEXT: s_cbranch_execz BB2_2
|
||||
; GFX1064-NEXT: s_cbranch_execz .LBB2_2
|
||||
; GFX1064-NEXT: ; %bb.1:
|
||||
; GFX1064-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX1064-NEXT: s_mov_b32 s7, 0x31016000
|
||||
|
@ -522,7 +522,7 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX1064-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: buffer_gl1_inv
|
||||
; GFX1064-NEXT: BB2_2:
|
||||
; GFX1064-NEXT: .LBB2_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1064-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -565,7 +565,7 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX1032-NEXT: s_mov_b32 s6, -1
|
||||
; GFX1032-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s8, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz BB2_2
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB2_2
|
||||
; GFX1032-NEXT: ; %bb.1:
|
||||
; GFX1032-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX1032-NEXT: s_mov_b32 s7, 0x31016000
|
||||
|
@ -578,7 +578,7 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX1032-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: buffer_gl1_inv
|
||||
; GFX1032-NEXT: BB2_2:
|
||||
; GFX1032-NEXT: .LBB2_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s8
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -606,7 +606,7 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out, i64 addrspac
|
|||
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
||||
; GFX7LESS-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX7LESS-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GFX7LESS-NEXT: s_cbranch_execz BB3_2
|
||||
; GFX7LESS-NEXT: s_cbranch_execz .LBB3_2
|
||||
; GFX7LESS-NEXT: ; %bb.1:
|
||||
; GFX7LESS-NEXT: s_mov_b32 s11, 0xf000
|
||||
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
|
||||
|
@ -621,7 +621,7 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out, i64 addrspac
|
|||
; GFX7LESS-NEXT: buffer_atomic_add_x2 v[0:1], off, s[8:11], 0 glc
|
||||
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7LESS-NEXT: buffer_wbinvl1
|
||||
; GFX7LESS-NEXT: BB3_2:
|
||||
; GFX7LESS-NEXT: .LBB3_2:
|
||||
; GFX7LESS-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000
|
||||
|
@ -646,7 +646,7 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out, i64 addrspac
|
|||
; GFX89-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
||||
; GFX89-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX89-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GFX89-NEXT: s_cbranch_execz BB3_2
|
||||
; GFX89-NEXT: s_cbranch_execz .LBB3_2
|
||||
; GFX89-NEXT: ; %bb.1:
|
||||
; GFX89-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX89-NEXT: s_mov_b32 s8, s2
|
||||
|
@ -661,7 +661,7 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out, i64 addrspac
|
|||
; GFX89-NEXT: buffer_atomic_add_x2 v[0:1], off, s[8:11], 0 glc
|
||||
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX89-NEXT: buffer_wbinvl1_vol
|
||||
; GFX89-NEXT: BB3_2:
|
||||
; GFX89-NEXT: .LBB3_2:
|
||||
; GFX89-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX89-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX89-NEXT: v_readfirstlane_b32 s2, v0
|
||||
|
@ -684,7 +684,7 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out, i64 addrspac
|
|||
; GFX1064-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
||||
; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GFX1064-NEXT: s_cbranch_execz BB3_2
|
||||
; GFX1064-NEXT: s_cbranch_execz .LBB3_2
|
||||
; GFX1064-NEXT: ; %bb.1:
|
||||
; GFX1064-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
|
||||
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
|
||||
|
@ -701,7 +701,7 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out, i64 addrspac
|
|||
; GFX1064-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: buffer_gl1_inv
|
||||
; GFX1064-NEXT: BB3_2:
|
||||
; GFX1064-NEXT: .LBB3_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -721,7 +721,7 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out, i64 addrspac
|
|||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v2, s5, 0
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz BB3_2
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB3_2
|
||||
; GFX1032-NEXT: ; %bb.1:
|
||||
; GFX1032-NEXT: s_bcnt1_i32_b32 s5, s5
|
||||
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
|
||||
|
@ -738,7 +738,7 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out, i64 addrspac
|
|||
; GFX1032-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: buffer_gl1_inv
|
||||
; GFX1032-NEXT: BB3_2:
|
||||
; GFX1032-NEXT: .LBB3_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s4
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -766,7 +766,7 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 addrspace
|
|||
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
||||
; GFX7LESS-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX7LESS-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; GFX7LESS-NEXT: s_cbranch_execz BB4_2
|
||||
; GFX7LESS-NEXT: s_cbranch_execz .LBB4_2
|
||||
; GFX7LESS-NEXT: ; %bb.1:
|
||||
; GFX7LESS-NEXT: s_mov_b32 s15, 0xf000
|
||||
; GFX7LESS-NEXT: s_mov_b32 s14, -1
|
||||
|
@ -784,7 +784,7 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 addrspace
|
|||
; GFX7LESS-NEXT: buffer_atomic_add_x2 v[0:1], off, s[12:15], 0 glc
|
||||
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7LESS-NEXT: buffer_wbinvl1
|
||||
; GFX7LESS-NEXT: BB4_2:
|
||||
; GFX7LESS-NEXT: .LBB4_2:
|
||||
; GFX7LESS-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7LESS-NEXT: s_mov_b32 s7, 0xf000
|
||||
|
@ -812,7 +812,7 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 addrspace
|
|||
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
||||
; GFX8-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX8-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; GFX8-NEXT: s_cbranch_execz BB4_2
|
||||
; GFX8-NEXT: s_cbranch_execz .LBB4_2
|
||||
; GFX8-NEXT: ; %bb.1:
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: s_mov_b32 s12, s6
|
||||
|
@ -830,7 +830,7 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 addrspace
|
|||
; GFX8-NEXT: buffer_atomic_add_x2 v[0:1], off, s[12:15], 0 glc
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX8-NEXT: buffer_wbinvl1_vol
|
||||
; GFX8-NEXT: BB4_2:
|
||||
; GFX8-NEXT: .LBB4_2:
|
||||
; GFX8-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX8-NEXT: v_readfirstlane_b32 s2, v0
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -857,7 +857,7 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 addrspace
|
|||
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
||||
; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; GFX9-NEXT: s_cbranch_execz BB4_2
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB4_2
|
||||
; GFX9-NEXT: ; %bb.1:
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_mov_b32 s12, s6
|
||||
|
@ -875,7 +875,7 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 addrspace
|
|||
; GFX9-NEXT: buffer_atomic_add_x2 v[0:1], off, s[12:15], 0 glc
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: BB4_2:
|
||||
; GFX9-NEXT: .LBB4_2:
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: v_mul_lo_u32 v3, s3, v2
|
||||
|
@ -903,7 +903,7 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 addrspace
|
|||
; GFX1064-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
||||
; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; GFX1064-NEXT: s_cbranch_execz BB4_2
|
||||
; GFX1064-NEXT: s_cbranch_execz .LBB4_2
|
||||
; GFX1064-NEXT: ; %bb.1:
|
||||
; GFX1064-NEXT: s_bcnt1_i32_b64 s8, s[8:9]
|
||||
; GFX1064-NEXT: s_mov_b32 s11, 0x31016000
|
||||
|
@ -923,7 +923,7 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 addrspace
|
|||
; GFX1064-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: buffer_gl1_inv
|
||||
; GFX1064-NEXT: BB4_2:
|
||||
; GFX1064-NEXT: .LBB4_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1064-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -950,7 +950,7 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 addrspace
|
|||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v2, s8, 0
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz BB4_2
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB4_2
|
||||
; GFX1032-NEXT: ; %bb.1:
|
||||
; GFX1032-NEXT: s_bcnt1_i32_b32 s1, s8
|
||||
; GFX1032-NEXT: s_mov_b32 s11, 0x31016000
|
||||
|
@ -970,7 +970,7 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 addrspace
|
|||
; GFX1032-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: buffer_gl1_inv
|
||||
; GFX1032-NEXT: BB4_2:
|
||||
; GFX1032-NEXT: .LBB4_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s0
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -1072,7 +1072,7 @@ define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out, i32 addrspac
|
|||
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX7LESS-NEXT: ; implicit-def: $vgpr1
|
||||
; GFX7LESS-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GFX7LESS-NEXT: s_cbranch_execz BB6_2
|
||||
; GFX7LESS-NEXT: s_cbranch_execz .LBB6_2
|
||||
; GFX7LESS-NEXT: ; %bb.1:
|
||||
; GFX7LESS-NEXT: s_mov_b32 s11, 0xf000
|
||||
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
|
||||
|
@ -1086,7 +1086,7 @@ define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out, i32 addrspac
|
|||
; GFX7LESS-NEXT: buffer_atomic_sub v1, off, s[8:11], 0 glc
|
||||
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7LESS-NEXT: buffer_wbinvl1
|
||||
; GFX7LESS-NEXT: BB6_2:
|
||||
; GFX7LESS-NEXT: .LBB6_2:
|
||||
; GFX7LESS-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000
|
||||
|
@ -1106,7 +1106,7 @@ define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out, i32 addrspac
|
|||
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX8-NEXT: ; implicit-def: $vgpr1
|
||||
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GFX8-NEXT: s_cbranch_execz BB6_2
|
||||
; GFX8-NEXT: s_cbranch_execz .LBB6_2
|
||||
; GFX8-NEXT: ; %bb.1:
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: s_mov_b32 s8, s2
|
||||
|
@ -1120,7 +1120,7 @@ define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out, i32 addrspac
|
|||
; GFX8-NEXT: buffer_atomic_sub v1, off, s[8:11], 0 glc
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX8-NEXT: buffer_wbinvl1_vol
|
||||
; GFX8-NEXT: BB6_2:
|
||||
; GFX8-NEXT: .LBB6_2:
|
||||
; GFX8-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX8-NEXT: v_readfirstlane_b32 s4, v1
|
||||
; GFX8-NEXT: v_mul_u32_u24_e32 v0, 5, v0
|
||||
|
@ -1140,7 +1140,7 @@ define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out, i32 addrspac
|
|||
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX9-NEXT: ; implicit-def: $vgpr1
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GFX9-NEXT: s_cbranch_execz BB6_2
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB6_2
|
||||
; GFX9-NEXT: ; %bb.1:
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_mov_b32 s8, s2
|
||||
|
@ -1154,7 +1154,7 @@ define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out, i32 addrspac
|
|||
; GFX9-NEXT: buffer_atomic_sub v1, off, s[8:11], 0 glc
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: BB6_2:
|
||||
; GFX9-NEXT: .LBB6_2:
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: v_readfirstlane_b32 s4, v1
|
||||
; GFX9-NEXT: v_mul_u32_u24_e32 v0, 5, v0
|
||||
|
@ -1174,7 +1174,7 @@ define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out, i32 addrspac
|
|||
; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s7, v0
|
||||
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GFX1064-NEXT: s_cbranch_execz BB6_2
|
||||
; GFX1064-NEXT: s_cbranch_execz .LBB6_2
|
||||
; GFX1064-NEXT: ; %bb.1:
|
||||
; GFX1064-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
|
||||
; GFX1064-NEXT: s_mov_b32 s11, 0x31016000
|
||||
|
@ -1190,7 +1190,7 @@ define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out, i32 addrspac
|
|||
; GFX1064-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: buffer_gl1_inv
|
||||
; GFX1064-NEXT: BB6_2:
|
||||
; GFX1064-NEXT: .LBB6_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -1210,7 +1210,7 @@ define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out, i32 addrspac
|
|||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s5, 0
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz BB6_2
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB6_2
|
||||
; GFX1032-NEXT: ; %bb.1:
|
||||
; GFX1032-NEXT: s_bcnt1_i32_b32 s5, s5
|
||||
; GFX1032-NEXT: s_mov_b32 s11, 0x31016000
|
||||
|
@ -1226,7 +1226,7 @@ define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out, i32 addrspac
|
|||
; GFX1032-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: buffer_gl1_inv
|
||||
; GFX1032-NEXT: BB6_2:
|
||||
; GFX1032-NEXT: .LBB6_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s4
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -1254,7 +1254,7 @@ define amdgpu_kernel void @sub_i32_uniform(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX7LESS-NEXT: ; implicit-def: $vgpr1
|
||||
; GFX7LESS-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; GFX7LESS-NEXT: s_cbranch_execz BB7_2
|
||||
; GFX7LESS-NEXT: s_cbranch_execz .LBB7_2
|
||||
; GFX7LESS-NEXT: ; %bb.1:
|
||||
; GFX7LESS-NEXT: s_mov_b32 s15, 0xf000
|
||||
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
|
||||
|
@ -1268,7 +1268,7 @@ define amdgpu_kernel void @sub_i32_uniform(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX7LESS-NEXT: buffer_atomic_sub v1, off, s[12:15], 0 glc
|
||||
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7LESS-NEXT: buffer_wbinvl1
|
||||
; GFX7LESS-NEXT: BB7_2:
|
||||
; GFX7LESS-NEXT: .LBB7_2:
|
||||
; GFX7LESS-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7LESS-NEXT: s_mov_b32 s7, 0xf000
|
||||
|
@ -1289,7 +1289,7 @@ define amdgpu_kernel void @sub_i32_uniform(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX8-NEXT: ; implicit-def: $vgpr1
|
||||
; GFX8-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; GFX8-NEXT: s_cbranch_execz BB7_2
|
||||
; GFX8-NEXT: s_cbranch_execz .LBB7_2
|
||||
; GFX8-NEXT: ; %bb.1:
|
||||
; GFX8-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -1303,7 +1303,7 @@ define amdgpu_kernel void @sub_i32_uniform(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX8-NEXT: buffer_atomic_sub v1, off, s[12:15], 0 glc
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX8-NEXT: buffer_wbinvl1_vol
|
||||
; GFX8-NEXT: BB7_2:
|
||||
; GFX8-NEXT: .LBB7_2:
|
||||
; GFX8-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: v_mul_lo_u32 v0, s8, v0
|
||||
|
@ -1324,7 +1324,7 @@ define amdgpu_kernel void @sub_i32_uniform(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX9-NEXT: ; implicit-def: $vgpr1
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; GFX9-NEXT: s_cbranch_execz BB7_2
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB7_2
|
||||
; GFX9-NEXT: ; %bb.1:
|
||||
; GFX9-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -1338,7 +1338,7 @@ define amdgpu_kernel void @sub_i32_uniform(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX9-NEXT: buffer_atomic_sub v1, off, s[12:15], 0 glc
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: BB7_2:
|
||||
; GFX9-NEXT: .LBB7_2:
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: v_mul_lo_u32 v0, s8, v0
|
||||
|
@ -1360,7 +1360,7 @@ define amdgpu_kernel void @sub_i32_uniform(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s3, v0
|
||||
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; GFX1064-NEXT: s_cbranch_execz BB7_2
|
||||
; GFX1064-NEXT: s_cbranch_execz .LBB7_2
|
||||
; GFX1064-NEXT: ; %bb.1:
|
||||
; GFX1064-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
|
||||
; GFX1064-NEXT: s_mov_b32 s15, 0x31016000
|
||||
|
@ -1376,7 +1376,7 @@ define amdgpu_kernel void @sub_i32_uniform(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX1064-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: buffer_gl1_inv
|
||||
; GFX1064-NEXT: BB7_2:
|
||||
; GFX1064-NEXT: .LBB7_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1064-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -1398,7 +1398,7 @@ define amdgpu_kernel void @sub_i32_uniform(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s3, 0
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz BB7_2
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB7_2
|
||||
; GFX1032-NEXT: ; %bb.1:
|
||||
; GFX1032-NEXT: s_bcnt1_i32_b32 s1, s3
|
||||
; GFX1032-NEXT: s_mov_b32 s11, 0x31016000
|
||||
|
@ -1414,7 +1414,7 @@ define amdgpu_kernel void @sub_i32_uniform(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX1032-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: buffer_gl1_inv
|
||||
; GFX1032-NEXT: BB7_2:
|
||||
; GFX1032-NEXT: .LBB7_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s0
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -1482,7 +1482,7 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX8-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GFX8-NEXT: s_cbranch_execz BB8_2
|
||||
; GFX8-NEXT: s_cbranch_execz .LBB8_2
|
||||
; GFX8-NEXT: ; %bb.1:
|
||||
; GFX8-NEXT: s_mov_b32 s11, 0xf000
|
||||
; GFX8-NEXT: s_mov_b32 s10, -1
|
||||
|
@ -1494,7 +1494,7 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX8-NEXT: buffer_atomic_sub v0, off, s[8:11], 0 glc
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX8-NEXT: buffer_wbinvl1_vol
|
||||
; GFX8-NEXT: BB8_2:
|
||||
; GFX8-NEXT: .LBB8_2:
|
||||
; GFX8-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX8-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, v1
|
||||
|
@ -1536,7 +1536,7 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX9-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GFX9-NEXT: s_cbranch_execz BB8_2
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB8_2
|
||||
; GFX9-NEXT: ; %bb.1:
|
||||
; GFX9-NEXT: s_mov_b32 s11, 0xf000
|
||||
; GFX9-NEXT: s_mov_b32 s10, -1
|
||||
|
@ -1548,7 +1548,7 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX9-NEXT: buffer_atomic_sub v0, off, s[8:11], 0 glc
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: BB8_2:
|
||||
; GFX9-NEXT: .LBB8_2:
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, v1
|
||||
|
@ -1600,7 +1600,7 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX1064-NEXT: s_mov_b32 s6, -1
|
||||
; GFX1064-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1064-NEXT: s_and_saveexec_b64 s[8:9], vcc
|
||||
; GFX1064-NEXT: s_cbranch_execz BB8_2
|
||||
; GFX1064-NEXT: s_cbranch_execz .LBB8_2
|
||||
; GFX1064-NEXT: ; %bb.1:
|
||||
; GFX1064-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX1064-NEXT: s_mov_b32 s7, 0x31016000
|
||||
|
@ -1613,7 +1613,7 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX1064-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: buffer_gl1_inv
|
||||
; GFX1064-NEXT: BB8_2:
|
||||
; GFX1064-NEXT: .LBB8_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1064-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -1656,7 +1656,7 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX1032-NEXT: s_mov_b32 s6, -1
|
||||
; GFX1032-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s8, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz BB8_2
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB8_2
|
||||
; GFX1032-NEXT: ; %bb.1:
|
||||
; GFX1032-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX1032-NEXT: s_mov_b32 s7, 0x31016000
|
||||
|
@ -1669,7 +1669,7 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out, i32 addrspace
|
|||
; GFX1032-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: buffer_gl1_inv
|
||||
; GFX1032-NEXT: BB8_2:
|
||||
; GFX1032-NEXT: .LBB8_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s8
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -1697,7 +1697,7 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out, i64 addrspac
|
|||
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
||||
; GFX7LESS-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX7LESS-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GFX7LESS-NEXT: s_cbranch_execz BB9_2
|
||||
; GFX7LESS-NEXT: s_cbranch_execz .LBB9_2
|
||||
; GFX7LESS-NEXT: ; %bb.1:
|
||||
; GFX7LESS-NEXT: s_mov_b32 s11, 0xf000
|
||||
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
|
||||
|
@ -1712,7 +1712,7 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out, i64 addrspac
|
|||
; GFX7LESS-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[8:11], 0 glc
|
||||
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7LESS-NEXT: buffer_wbinvl1
|
||||
; GFX7LESS-NEXT: BB9_2:
|
||||
; GFX7LESS-NEXT: .LBB9_2:
|
||||
; GFX7LESS-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000
|
||||
|
@ -1737,7 +1737,7 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out, i64 addrspac
|
|||
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
||||
; GFX8-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GFX8-NEXT: s_cbranch_execz BB9_2
|
||||
; GFX8-NEXT: s_cbranch_execz .LBB9_2
|
||||
; GFX8-NEXT: ; %bb.1:
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: s_mov_b32 s8, s2
|
||||
|
@ -1752,7 +1752,7 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out, i64 addrspac
|
|||
; GFX8-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[8:11], 0 glc
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX8-NEXT: buffer_wbinvl1_vol
|
||||
; GFX8-NEXT: BB9_2:
|
||||
; GFX8-NEXT: .LBB9_2:
|
||||
; GFX8-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX8-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GFX8-NEXT: v_readfirstlane_b32 s5, v1
|
||||
|
@ -1776,7 +1776,7 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out, i64 addrspac
|
|||
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
||||
; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GFX9-NEXT: s_cbranch_execz BB9_2
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB9_2
|
||||
; GFX9-NEXT: ; %bb.1:
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_mov_b32 s8, s2
|
||||
|
@ -1791,7 +1791,7 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out, i64 addrspac
|
|||
; GFX9-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[8:11], 0 glc
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: BB9_2:
|
||||
; GFX9-NEXT: .LBB9_2:
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GFX9-NEXT: v_readfirstlane_b32 s5, v1
|
||||
|
@ -1815,7 +1815,7 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out, i64 addrspac
|
|||
; GFX1064-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
||||
; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GFX1064-NEXT: s_cbranch_execz BB9_2
|
||||
; GFX1064-NEXT: s_cbranch_execz .LBB9_2
|
||||
; GFX1064-NEXT: ; %bb.1:
|
||||
; GFX1064-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
|
||||
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
|
||||
|
@ -1832,7 +1832,7 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out, i64 addrspac
|
|||
; GFX1064-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: buffer_gl1_inv
|
||||
; GFX1064-NEXT: BB9_2:
|
||||
; GFX1064-NEXT: .LBB9_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -1855,7 +1855,7 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out, i64 addrspac
|
|||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v2, s5, 0
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz BB9_2
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB9_2
|
||||
; GFX1032-NEXT: ; %bb.1:
|
||||
; GFX1032-NEXT: s_bcnt1_i32_b32 s5, s5
|
||||
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
|
||||
|
@ -1872,7 +1872,7 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out, i64 addrspac
|
|||
; GFX1032-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: buffer_gl1_inv
|
||||
; GFX1032-NEXT: BB9_2:
|
||||
; GFX1032-NEXT: .LBB9_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s4
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -1903,7 +1903,7 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 addrspace
|
|||
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
||||
; GFX7LESS-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX7LESS-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; GFX7LESS-NEXT: s_cbranch_execz BB10_2
|
||||
; GFX7LESS-NEXT: s_cbranch_execz .LBB10_2
|
||||
; GFX7LESS-NEXT: ; %bb.1:
|
||||
; GFX7LESS-NEXT: s_mov_b32 s15, 0xf000
|
||||
; GFX7LESS-NEXT: s_mov_b32 s14, -1
|
||||
|
@ -1921,7 +1921,7 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 addrspace
|
|||
; GFX7LESS-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[12:15], 0 glc
|
||||
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7LESS-NEXT: buffer_wbinvl1
|
||||
; GFX7LESS-NEXT: BB10_2:
|
||||
; GFX7LESS-NEXT: .LBB10_2:
|
||||
; GFX7LESS-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7LESS-NEXT: s_mov_b32 s7, 0xf000
|
||||
|
@ -1949,7 +1949,7 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 addrspace
|
|||
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
||||
; GFX8-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX8-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; GFX8-NEXT: s_cbranch_execz BB10_2
|
||||
; GFX8-NEXT: s_cbranch_execz .LBB10_2
|
||||
; GFX8-NEXT: ; %bb.1:
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: s_mov_b32 s12, s6
|
||||
|
@ -1967,7 +1967,7 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 addrspace
|
|||
; GFX8-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[12:15], 0 glc
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX8-NEXT: buffer_wbinvl1_vol
|
||||
; GFX8-NEXT: BB10_2:
|
||||
; GFX8-NEXT: .LBB10_2:
|
||||
; GFX8-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX8-NEXT: v_readfirstlane_b32 s2, v0
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -1994,7 +1994,7 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 addrspace
|
|||
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
||||
; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; GFX9-NEXT: s_cbranch_execz BB10_2
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB10_2
|
||||
; GFX9-NEXT: ; %bb.1:
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_mov_b32 s12, s6
|
||||
|
@ -2012,7 +2012,7 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 addrspace
|
|||
; GFX9-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[12:15], 0 glc
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: BB10_2:
|
||||
; GFX9-NEXT: .LBB10_2:
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: v_mul_lo_u32 v3, s3, v2
|
||||
|
@ -2040,7 +2040,7 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 addrspace
|
|||
; GFX1064-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
||||
; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; GFX1064-NEXT: s_cbranch_execz BB10_2
|
||||
; GFX1064-NEXT: s_cbranch_execz .LBB10_2
|
||||
; GFX1064-NEXT: ; %bb.1:
|
||||
; GFX1064-NEXT: s_bcnt1_i32_b64 s8, s[8:9]
|
||||
; GFX1064-NEXT: s_mov_b32 s11, 0x31016000
|
||||
|
@ -2060,7 +2060,7 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 addrspace
|
|||
; GFX1064-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: buffer_gl1_inv
|
||||
; GFX1064-NEXT: BB10_2:
|
||||
; GFX1064-NEXT: .LBB10_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1064-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -2087,7 +2087,7 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 addrspace
|
|||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v2, s8, 0
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz BB10_2
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB10_2
|
||||
; GFX1032-NEXT: ; %bb.1:
|
||||
; GFX1032-NEXT: s_bcnt1_i32_b32 s1, s8
|
||||
; GFX1032-NEXT: s_mov_b32 s11, 0x31016000
|
||||
|
@ -2107,7 +2107,7 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 addrspace
|
|||
; GFX1032-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: buffer_gl1_inv
|
||||
; GFX1032-NEXT: BB10_2:
|
||||
; GFX1032-NEXT: .LBB10_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s0
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -17,7 +17,7 @@ define amdgpu_ps void @add_i32_constant(<4 x i32> inreg %out, <4 x i32> inreg %i
|
|||
; GFX7-NEXT: s_mov_b64 s[10:11], exec
|
||||
; GFX7-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX7-NEXT: s_and_saveexec_b64 s[8:9], s[10:11]
|
||||
; GFX7-NEXT: s_cbranch_execz BB0_4
|
||||
; GFX7-NEXT: s_cbranch_execz .LBB0_4
|
||||
; GFX7-NEXT: ; %bb.1:
|
||||
; GFX7-NEXT: s_mov_b64 s[12:13], exec
|
||||
; GFX7-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s12, 0
|
||||
|
@ -25,25 +25,25 @@ define amdgpu_ps void @add_i32_constant(<4 x i32> inreg %out, <4 x i32> inreg %i
|
|||
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX7-NEXT: ; implicit-def: $vgpr1
|
||||
; GFX7-NEXT: s_and_saveexec_b64 s[10:11], vcc
|
||||
; GFX7-NEXT: s_cbranch_execz BB0_3
|
||||
; GFX7-NEXT: s_cbranch_execz .LBB0_3
|
||||
; GFX7-NEXT: ; %bb.2:
|
||||
; GFX7-NEXT: s_bcnt1_i32_b64 s12, s[12:13]
|
||||
; GFX7-NEXT: s_mul_i32 s12, s12, 5
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s12
|
||||
; GFX7-NEXT: buffer_atomic_add v1, off, s[4:7], 0 glc
|
||||
; GFX7-NEXT: BB0_3:
|
||||
; GFX7-NEXT: .LBB0_3:
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[10:11]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: v_readfirstlane_b32 s4, v1
|
||||
; GFX7-NEXT: v_mad_u32_u24 v0, v0, 5, s4
|
||||
; GFX7-NEXT: BB0_4: ; %Flow
|
||||
; GFX7-NEXT: .LBB0_4: ; %Flow
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_wqm_b64 s[4:5], -1
|
||||
; GFX7-NEXT: s_andn2_b64 vcc, exec, s[4:5]
|
||||
; GFX7-NEXT: s_cbranch_vccnz BB0_6
|
||||
; GFX7-NEXT: s_cbranch_vccnz .LBB0_6
|
||||
; GFX7-NEXT: ; %bb.5: ; %if
|
||||
; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX7-NEXT: BB0_6: ; %UnifiedReturnBlock
|
||||
; GFX7-NEXT: .LBB0_6: ; %UnifiedReturnBlock
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
; GFX89-LABEL: add_i32_constant:
|
||||
|
@ -51,7 +51,7 @@ define amdgpu_ps void @add_i32_constant(<4 x i32> inreg %out, <4 x i32> inreg %i
|
|||
; GFX89-NEXT: s_mov_b64 s[10:11], exec
|
||||
; GFX89-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX89-NEXT: s_and_saveexec_b64 s[8:9], s[10:11]
|
||||
; GFX89-NEXT: s_cbranch_execz BB0_4
|
||||
; GFX89-NEXT: s_cbranch_execz .LBB0_4
|
||||
; GFX89-NEXT: ; %bb.1:
|
||||
; GFX89-NEXT: s_mov_b64 s[12:13], exec
|
||||
; GFX89-NEXT: v_mbcnt_lo_u32_b32 v0, s12, 0
|
||||
|
@ -59,25 +59,25 @@ define amdgpu_ps void @add_i32_constant(<4 x i32> inreg %out, <4 x i32> inreg %i
|
|||
; GFX89-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX89-NEXT: ; implicit-def: $vgpr1
|
||||
; GFX89-NEXT: s_and_saveexec_b64 s[10:11], vcc
|
||||
; GFX89-NEXT: s_cbranch_execz BB0_3
|
||||
; GFX89-NEXT: s_cbranch_execz .LBB0_3
|
||||
; GFX89-NEXT: ; %bb.2:
|
||||
; GFX89-NEXT: s_bcnt1_i32_b64 s12, s[12:13]
|
||||
; GFX89-NEXT: s_mul_i32 s12, s12, 5
|
||||
; GFX89-NEXT: v_mov_b32_e32 v1, s12
|
||||
; GFX89-NEXT: buffer_atomic_add v1, off, s[4:7], 0 glc
|
||||
; GFX89-NEXT: BB0_3:
|
||||
; GFX89-NEXT: .LBB0_3:
|
||||
; GFX89-NEXT: s_or_b64 exec, exec, s[10:11]
|
||||
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX89-NEXT: v_readfirstlane_b32 s4, v1
|
||||
; GFX89-NEXT: v_mad_u32_u24 v0, v0, 5, s4
|
||||
; GFX89-NEXT: BB0_4: ; %Flow
|
||||
; GFX89-NEXT: .LBB0_4: ; %Flow
|
||||
; GFX89-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX89-NEXT: s_wqm_b64 s[4:5], -1
|
||||
; GFX89-NEXT: s_andn2_b64 vcc, exec, s[4:5]
|
||||
; GFX89-NEXT: s_cbranch_vccnz BB0_6
|
||||
; GFX89-NEXT: s_cbranch_vccnz .LBB0_6
|
||||
; GFX89-NEXT: ; %bb.5: ; %if
|
||||
; GFX89-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX89-NEXT: BB0_6: ; %UnifiedReturnBlock
|
||||
; GFX89-NEXT: .LBB0_6: ; %UnifiedReturnBlock
|
||||
; GFX89-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1064-LABEL: add_i32_constant:
|
||||
|
@ -85,7 +85,7 @@ define amdgpu_ps void @add_i32_constant(<4 x i32> inreg %out, <4 x i32> inreg %i
|
|||
; GFX1064-NEXT: s_mov_b64 s[10:11], exec
|
||||
; GFX1064-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1064-NEXT: s_and_saveexec_b64 s[8:9], s[10:11]
|
||||
; GFX1064-NEXT: s_cbranch_execz BB0_4
|
||||
; GFX1064-NEXT: s_cbranch_execz .LBB0_4
|
||||
; GFX1064-NEXT: ; %bb.1:
|
||||
; GFX1064-NEXT: s_mov_b64 s[12:13], exec
|
||||
; GFX1064-NEXT: ; implicit-def: $vgpr1
|
||||
|
@ -93,26 +93,26 @@ define amdgpu_ps void @add_i32_constant(<4 x i32> inreg %out, <4 x i32> inreg %i
|
|||
; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s13, v0
|
||||
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX1064-NEXT: s_and_saveexec_b64 s[10:11], vcc
|
||||
; GFX1064-NEXT: s_cbranch_execz BB0_3
|
||||
; GFX1064-NEXT: s_cbranch_execz .LBB0_3
|
||||
; GFX1064-NEXT: ; %bb.2:
|
||||
; GFX1064-NEXT: s_bcnt1_i32_b64 s12, s[12:13]
|
||||
; GFX1064-NEXT: s_mul_i32 s12, s12, 5
|
||||
; GFX1064-NEXT: v_mov_b32_e32 v1, s12
|
||||
; GFX1064-NEXT: buffer_atomic_add v1, off, s[4:7], 0 glc
|
||||
; GFX1064-NEXT: BB0_3:
|
||||
; GFX1064-NEXT: .LBB0_3:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1064-NEXT: s_or_b64 exec, exec, s[10:11]
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1064-NEXT: v_readfirstlane_b32 s4, v1
|
||||
; GFX1064-NEXT: v_mad_u32_u24 v0, v0, 5, s4
|
||||
; GFX1064-NEXT: BB0_4: ; %Flow
|
||||
; GFX1064-NEXT: .LBB0_4: ; %Flow
|
||||
; GFX1064-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX1064-NEXT: s_wqm_b64 s[4:5], -1
|
||||
; GFX1064-NEXT: s_andn2_b64 vcc, exec, s[4:5]
|
||||
; GFX1064-NEXT: s_cbranch_vccnz BB0_6
|
||||
; GFX1064-NEXT: s_cbranch_vccnz .LBB0_6
|
||||
; GFX1064-NEXT: ; %bb.5: ; %if
|
||||
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX1064-NEXT: BB0_6: ; %UnifiedReturnBlock
|
||||
; GFX1064-NEXT: .LBB0_6: ; %UnifiedReturnBlock
|
||||
; GFX1064-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1032-LABEL: add_i32_constant:
|
||||
|
@ -120,33 +120,33 @@ define amdgpu_ps void @add_i32_constant(<4 x i32> inreg %out, <4 x i32> inreg %i
|
|||
; GFX1032-NEXT: s_mov_b32 s9, exec_lo
|
||||
; GFX1032-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s8, s9
|
||||
; GFX1032-NEXT: s_cbranch_execz BB0_4
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB0_4
|
||||
; GFX1032-NEXT: ; %bb.1:
|
||||
; GFX1032-NEXT: s_mov_b32 s10, exec_lo
|
||||
; GFX1032-NEXT: ; implicit-def: $vgpr1
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s10, 0
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s9, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz BB0_3
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB0_3
|
||||
; GFX1032-NEXT: ; %bb.2:
|
||||
; GFX1032-NEXT: s_bcnt1_i32_b32 s10, s10
|
||||
; GFX1032-NEXT: s_mul_i32 s10, s10, 5
|
||||
; GFX1032-NEXT: v_mov_b32_e32 v1, s10
|
||||
; GFX1032-NEXT: buffer_atomic_add v1, off, s[4:7], 0 glc
|
||||
; GFX1032-NEXT: BB0_3:
|
||||
; GFX1032-NEXT: .LBB0_3:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s9
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1032-NEXT: v_readfirstlane_b32 s4, v1
|
||||
; GFX1032-NEXT: v_mad_u32_u24 v0, v0, 5, s4
|
||||
; GFX1032-NEXT: BB0_4: ; %Flow
|
||||
; GFX1032-NEXT: .LBB0_4: ; %Flow
|
||||
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s8
|
||||
; GFX1032-NEXT: s_wqm_b32 s4, -1
|
||||
; GFX1032-NEXT: s_andn2_b32 vcc_lo, exec_lo, s4
|
||||
; GFX1032-NEXT: s_cbranch_vccnz BB0_6
|
||||
; GFX1032-NEXT: s_cbranch_vccnz .LBB0_6
|
||||
; GFX1032-NEXT: ; %bb.5: ; %if
|
||||
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX1032-NEXT: BB0_6: ; %UnifiedReturnBlock
|
||||
; GFX1032-NEXT: .LBB0_6: ; %UnifiedReturnBlock
|
||||
; GFX1032-NEXT: s_endpgm
|
||||
entry:
|
||||
%cond1 = call i1 @llvm.amdgcn.wqm.vote(i1 true)
|
||||
|
@ -168,11 +168,11 @@ define amdgpu_ps void @add_i32_varying(<4 x i32> inreg %out, <4 x i32> inreg %in
|
|||
; GFX7-NEXT: s_wqm_b64 s[8:9], -1
|
||||
; GFX7-NEXT: buffer_atomic_add v0, off, s[4:7], 0 glc
|
||||
; GFX7-NEXT: s_andn2_b64 vcc, exec, s[8:9]
|
||||
; GFX7-NEXT: s_cbranch_vccnz BB1_2
|
||||
; GFX7-NEXT: s_cbranch_vccnz .LBB1_2
|
||||
; GFX7-NEXT: ; %bb.1: ; %if
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX7-NEXT: BB1_2: ; %else
|
||||
; GFX7-NEXT: .LBB1_2: ; %else
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
; GFX8-LABEL: add_i32_varying:
|
||||
|
@ -182,7 +182,7 @@ define amdgpu_ps void @add_i32_varying(<4 x i32> inreg %out, <4 x i32> inreg %in
|
|||
; GFX8-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX8-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX8-NEXT: s_and_saveexec_b64 s[8:9], s[10:11]
|
||||
; GFX8-NEXT: s_cbranch_execz BB1_4
|
||||
; GFX8-NEXT: s_cbranch_execz .LBB1_4
|
||||
; GFX8-NEXT: ; %bb.1:
|
||||
; GFX8-NEXT: s_or_saveexec_b64 s[10:11], -1
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, 0
|
||||
|
@ -211,24 +211,24 @@ define amdgpu_ps void @add_i32_varying(<4 x i32> inreg %out, <4 x i32> inreg %in
|
|||
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX8-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX8-NEXT: s_and_saveexec_b64 s[10:11], vcc
|
||||
; GFX8-NEXT: s_cbranch_execz BB1_3
|
||||
; GFX8-NEXT: s_cbranch_execz .LBB1_3
|
||||
; GFX8-NEXT: ; %bb.2:
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, s12
|
||||
; GFX8-NEXT: buffer_atomic_add v0, off, s[4:7], 0 glc
|
||||
; GFX8-NEXT: BB1_3:
|
||||
; GFX8-NEXT: .LBB1_3:
|
||||
; GFX8-NEXT: s_or_b64 exec, exec, s[10:11]
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX8-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, v1
|
||||
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s4, v0
|
||||
; GFX8-NEXT: BB1_4: ; %Flow
|
||||
; GFX8-NEXT: .LBB1_4: ; %Flow
|
||||
; GFX8-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX8-NEXT: s_wqm_b64 s[4:5], -1
|
||||
; GFX8-NEXT: s_andn2_b64 vcc, exec, s[4:5]
|
||||
; GFX8-NEXT: s_cbranch_vccnz BB1_6
|
||||
; GFX8-NEXT: s_cbranch_vccnz .LBB1_6
|
||||
; GFX8-NEXT: ; %bb.5: ; %if
|
||||
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX8-NEXT: BB1_6: ; %UnifiedReturnBlock
|
||||
; GFX8-NEXT: .LBB1_6: ; %UnifiedReturnBlock
|
||||
; GFX8-NEXT: s_endpgm
|
||||
;
|
||||
; GFX9-LABEL: add_i32_varying:
|
||||
|
@ -238,7 +238,7 @@ define amdgpu_ps void @add_i32_varying(<4 x i32> inreg %out, <4 x i32> inreg %in
|
|||
; GFX9-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX9-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[8:9], s[10:11]
|
||||
; GFX9-NEXT: s_cbranch_execz BB1_4
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB1_4
|
||||
; GFX9-NEXT: ; %bb.1:
|
||||
; GFX9-NEXT: s_or_saveexec_b64 s[10:11], -1
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, 0
|
||||
|
@ -267,24 +267,24 @@ define amdgpu_ps void @add_i32_varying(<4 x i32> inreg %out, <4 x i32> inreg %in
|
|||
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX9-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[10:11], vcc
|
||||
; GFX9-NEXT: s_cbranch_execz BB1_3
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB1_3
|
||||
; GFX9-NEXT: ; %bb.2:
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, s12
|
||||
; GFX9-NEXT: buffer_atomic_add v0, off, s[4:7], 0 glc
|
||||
; GFX9-NEXT: BB1_3:
|
||||
; GFX9-NEXT: .LBB1_3:
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[10:11]
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, v1
|
||||
; GFX9-NEXT: v_add_u32_e32 v0, s4, v0
|
||||
; GFX9-NEXT: BB1_4: ; %Flow
|
||||
; GFX9-NEXT: .LBB1_4: ; %Flow
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX9-NEXT: s_wqm_b64 s[4:5], -1
|
||||
; GFX9-NEXT: s_andn2_b64 vcc, exec, s[4:5]
|
||||
; GFX9-NEXT: s_cbranch_vccnz BB1_6
|
||||
; GFX9-NEXT: s_cbranch_vccnz .LBB1_6
|
||||
; GFX9-NEXT: ; %bb.5: ; %if
|
||||
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX9-NEXT: BB1_6: ; %UnifiedReturnBlock
|
||||
; GFX9-NEXT: .LBB1_6: ; %UnifiedReturnBlock
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1064-LABEL: add_i32_varying:
|
||||
|
@ -294,7 +294,7 @@ define amdgpu_ps void @add_i32_varying(<4 x i32> inreg %out, <4 x i32> inreg %in
|
|||
; GFX1064-NEXT: s_mov_b64 s[10:11], s[8:9]
|
||||
; GFX1064-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1064-NEXT: s_and_saveexec_b64 s[8:9], s[10:11]
|
||||
; GFX1064-NEXT: s_cbranch_execz BB1_4
|
||||
; GFX1064-NEXT: s_cbranch_execz .LBB1_4
|
||||
; GFX1064-NEXT: ; %bb.1:
|
||||
; GFX1064-NEXT: s_not_b64 exec, exec
|
||||
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
|
||||
|
@ -329,25 +329,25 @@ define amdgpu_ps void @add_i32_varying(<4 x i32> inreg %out, <4 x i32> inreg %in
|
|||
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX1064-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1064-NEXT: s_and_saveexec_b64 s[10:11], vcc
|
||||
; GFX1064-NEXT: s_cbranch_execz BB1_3
|
||||
; GFX1064-NEXT: s_cbranch_execz .LBB1_3
|
||||
; GFX1064-NEXT: ; %bb.2:
|
||||
; GFX1064-NEXT: v_mov_b32_e32 v0, s12
|
||||
; GFX1064-NEXT: buffer_atomic_add v0, off, s[4:7], 0 glc
|
||||
; GFX1064-NEXT: BB1_3:
|
||||
; GFX1064-NEXT: .LBB1_3:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1064-NEXT: s_or_b64 exec, exec, s[10:11]
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1064-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GFX1064-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX1064-NEXT: v_add_nc_u32_e32 v0, s4, v0
|
||||
; GFX1064-NEXT: BB1_4: ; %Flow
|
||||
; GFX1064-NEXT: .LBB1_4: ; %Flow
|
||||
; GFX1064-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX1064-NEXT: s_wqm_b64 s[4:5], -1
|
||||
; GFX1064-NEXT: s_andn2_b64 vcc, exec, s[4:5]
|
||||
; GFX1064-NEXT: s_cbranch_vccnz BB1_6
|
||||
; GFX1064-NEXT: s_cbranch_vccnz .LBB1_6
|
||||
; GFX1064-NEXT: ; %bb.5: ; %if
|
||||
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX1064-NEXT: BB1_6: ; %UnifiedReturnBlock
|
||||
; GFX1064-NEXT: .LBB1_6: ; %UnifiedReturnBlock
|
||||
; GFX1064-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1032-LABEL: add_i32_varying:
|
||||
|
@ -357,7 +357,7 @@ define amdgpu_ps void @add_i32_varying(<4 x i32> inreg %out, <4 x i32> inreg %in
|
|||
; GFX1032-NEXT: s_mov_b32 s9, s8
|
||||
; GFX1032-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s8, s9
|
||||
; GFX1032-NEXT: s_cbranch_execz BB1_4
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB1_4
|
||||
; GFX1032-NEXT: ; %bb.1:
|
||||
; GFX1032-NEXT: s_not_b32 exec_lo, exec_lo
|
||||
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
|
||||
|
@ -382,26 +382,26 @@ define amdgpu_ps void @add_i32_varying(<4 x i32> inreg %out, <4 x i32> inreg %in
|
|||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s9, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz BB1_3
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB1_3
|
||||
; GFX1032-NEXT: ; %bb.2:
|
||||
; GFX1032-NEXT: v_mov_b32_e32 v0, s11
|
||||
; GFX1032-NEXT: s_mov_b32 s10, s11
|
||||
; GFX1032-NEXT: buffer_atomic_add v0, off, s[4:7], 0 glc
|
||||
; GFX1032-NEXT: BB1_3:
|
||||
; GFX1032-NEXT: .LBB1_3:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s9
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1032-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GFX1032-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX1032-NEXT: v_add_nc_u32_e32 v0, s4, v0
|
||||
; GFX1032-NEXT: BB1_4: ; %Flow
|
||||
; GFX1032-NEXT: .LBB1_4: ; %Flow
|
||||
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s8
|
||||
; GFX1032-NEXT: s_wqm_b32 s4, -1
|
||||
; GFX1032-NEXT: s_andn2_b32 vcc_lo, exec_lo, s4
|
||||
; GFX1032-NEXT: s_cbranch_vccnz BB1_6
|
||||
; GFX1032-NEXT: s_cbranch_vccnz .LBB1_6
|
||||
; GFX1032-NEXT: ; %bb.5: ; %if
|
||||
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX1032-NEXT: BB1_6: ; %UnifiedReturnBlock
|
||||
; GFX1032-NEXT: .LBB1_6: ; %UnifiedReturnBlock
|
||||
; GFX1032-NEXT: s_endpgm
|
||||
entry:
|
||||
%cond1 = call i1 @llvm.amdgcn.wqm.vote(i1 true)
|
||||
|
|
|
@ -7,7 +7,7 @@ define i32 @atomic_nand_i32_lds(i32 addrspace(3)* %ptr) nounwind {
|
|||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: ds_read_b32 v1, v0
|
||||
; GCN-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GCN-NEXT: BB0_1: ; %atomicrmw.start
|
||||
; GCN-NEXT: .LBB0_1: ; %atomicrmw.start
|
||||
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, v1
|
||||
|
@ -19,7 +19,7 @@ define i32 @atomic_nand_i32_lds(i32 addrspace(3)* %ptr) nounwind {
|
|||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
||||
; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
||||
; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
||||
; GCN-NEXT: s_cbranch_execnz BB0_1
|
||||
; GCN-NEXT: s_cbranch_execnz .LBB0_1
|
||||
; GCN-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, v1
|
||||
|
@ -34,7 +34,7 @@ define i32 @atomic_nand_i32_global(i32 addrspace(1)* %ptr) nounwind {
|
|||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: global_load_dword v2, v[0:1], off
|
||||
; GCN-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GCN-NEXT: BB1_1: ; %atomicrmw.start
|
||||
; GCN-NEXT: .LBB1_1: ; %atomicrmw.start
|
||||
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, v2
|
||||
|
@ -47,7 +47,7 @@ define i32 @atomic_nand_i32_global(i32 addrspace(1)* %ptr) nounwind {
|
|||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
||||
; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
||||
; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
||||
; GCN-NEXT: s_cbranch_execnz BB1_1
|
||||
; GCN-NEXT: s_cbranch_execnz .LBB1_1
|
||||
; GCN-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, v2
|
||||
|
@ -62,7 +62,7 @@ define i32 @atomic_nand_i32_flat(i32* %ptr) nounwind {
|
|||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: flat_load_dword v2, v[0:1]
|
||||
; GCN-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GCN-NEXT: BB2_1: ; %atomicrmw.start
|
||||
; GCN-NEXT: .LBB2_1: ; %atomicrmw.start
|
||||
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, v2
|
||||
|
@ -75,7 +75,7 @@ define i32 @atomic_nand_i32_flat(i32* %ptr) nounwind {
|
|||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
||||
; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
||||
; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
||||
; GCN-NEXT: s_cbranch_execnz BB2_1
|
||||
; GCN-NEXT: s_cbranch_execnz .LBB2_1
|
||||
; GCN-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, v2
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
; GCN-LABEL: {{^}}test_branch:
|
||||
; GCNNOOPT: v_writelane_b32
|
||||
; GCNNOOPT: v_writelane_b32
|
||||
; GCN: s_cbranch_scc1 [[END:BB[0-9]+_[0-9]+]]
|
||||
; GCN: s_cbranch_scc1 [[END:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCNNOOPT: v_readlane_b32
|
||||
; GCNNOOPT: v_readlane_b32
|
||||
|
@ -34,7 +34,7 @@ end:
|
|||
; GCNNOOPT: s_and_b32 s{{[0-9]+}}, [[VAL]], [[ONE]]
|
||||
; GCNOPT: s_bitcmp0_b32 [[VAL]], 0
|
||||
; GCNNOOPT: s_cmp_eq_u32
|
||||
; GCN: s_cbranch_scc1 [[END:BB[0-9]+_[0-9]+]]
|
||||
; GCN: s_cbranch_scc1 [[END:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: buffer_store_dword
|
||||
|
||||
|
|
|
@ -14,12 +14,12 @@
|
|||
; GCN-DAG: v_cmp_lt_f32_e32 vcc,
|
||||
; GCN: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[OTHERCC]]
|
||||
; GCN: s_and_saveexec_b64 [[SAVED:s\[[0-9]+:[0-9]+\]]], [[AND]]
|
||||
; GCN-NEXT: s_cbranch_execz BB0_{{[0-9]+}}
|
||||
; GCN-NEXT: s_cbranch_execz .LBB0_{{[0-9]+}}
|
||||
|
||||
; GCN-NEXT: ; %bb.{{[0-9]+}}: ; %bb4
|
||||
; GCN: ds_write_b32
|
||||
|
||||
; GCN: BB0_{{[0-9]+}}: ; %UnifiedReturnBlock
|
||||
; GCN: .LBB0_{{[0-9]+}}: ; %UnifiedReturnBlock
|
||||
; GCN-NEXT: s_endpgm
|
||||
; GCN-NEXT: .Lfunc_end
|
||||
define amdgpu_ps void @ham(float %arg, float %arg1) #0 {
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
declare void @func() #0
|
||||
|
||||
; GCN-LABEL: {{^}}bundle_size:
|
||||
; GCN: s_cbranch_scc0 [[BB_EXPANSION:BB[0-9]+_[0-9]+]]
|
||||
; GCN: s_cbranch_scc0 [[BB_EXPANSION:.LBB[0-9]+_[0-9]+]]
|
||||
; GCN: s_getpc_b64
|
||||
; GCN-NEXT: .Lpost_getpc{{[0-9]+}}:{{$}}
|
||||
; GCN-NEXT: s_add_u32
|
||||
|
|
|
@ -323,18 +323,18 @@ define amdgpu_kernel void @spill(i32 addrspace(1)* %arg, i32 %cnd) #0 {
|
|||
; CHECK-NEXT: ;;#ASMSTART
|
||||
; CHECK-NEXT: s_mov_b32 vcc_hi, 0
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: s_cbranch_scc0 BB0_1
|
||||
; CHECK-NEXT: BB0_3: ; %entry
|
||||
; CHECK-NEXT: s_cbranch_scc0 .LBB0_1
|
||||
; CHECK-NEXT: .LBB0_3: ; %entry
|
||||
; CHECK-NEXT: s_not_b64 exec, exec
|
||||
; CHECK-NEXT: buffer_store_dword v0, off, s[96:99], 0
|
||||
; CHECK-NEXT: v_writelane_b32 v0, s0, 0
|
||||
; CHECK-NEXT: v_writelane_b32 v0, s1, 1
|
||||
; CHECK-NEXT: s_getpc_b64 s[0:1]
|
||||
; CHECK-NEXT: .Lpost_getpc0:
|
||||
; CHECK-NEXT: s_add_u32 s0, s0, (BB0_4-.Lpost_getpc0)&4294967295
|
||||
; CHECK-NEXT: s_addc_u32 s1, s1, (BB0_4-.Lpost_getpc0)>>32
|
||||
; CHECK-NEXT: s_add_u32 s0, s0, (.LBB0_4-.Lpost_getpc0)&4294967295
|
||||
; CHECK-NEXT: s_addc_u32 s1, s1, (.LBB0_4-.Lpost_getpc0)>>32
|
||||
; CHECK-NEXT: s_setpc_b64 s[0:1]
|
||||
; CHECK-NEXT: BB0_1: ; %bb2
|
||||
; CHECK-NEXT: .LBB0_1: ; %bb2
|
||||
; CHECK-NEXT: ;;#ASMSTART
|
||||
; CHECK-NEXT: v_nop_e64
|
||||
; CHECK-NEXT: v_nop_e64
|
||||
|
@ -345,13 +345,13 @@ define amdgpu_kernel void @spill(i32 addrspace(1)* %arg, i32 %cnd) #0 {
|
|||
; CHECK-NEXT: v_nop_e64
|
||||
; CHECK-NEXT: v_nop_e64
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: s_branch BB0_2
|
||||
; CHECK-NEXT: BB0_4: ; %bb3
|
||||
; CHECK-NEXT: s_branch .LBB0_2
|
||||
; CHECK-NEXT: .LBB0_4: ; %bb3
|
||||
; CHECK-NEXT: v_readlane_b32 s0, v0, 0
|
||||
; CHECK-NEXT: v_readlane_b32 s1, v0, 1
|
||||
; CHECK-NEXT: buffer_load_dword v0, off, s[96:99], 0
|
||||
; CHECK-NEXT: s_not_b64 exec, exec
|
||||
; CHECK-NEXT: BB0_2: ; %bb3
|
||||
; CHECK-NEXT: .LBB0_2: ; %bb3
|
||||
; CHECK-NEXT: ;;#ASMSTART
|
||||
; CHECK-NEXT: ; reg use s0
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
|
@ -1289,18 +1289,18 @@ define void @spill_func(i32 addrspace(1)* %arg) #0 {
|
|||
; CHECK-NEXT: ;;#ASMSTART
|
||||
; CHECK-NEXT: s_mov_b32 vcc_hi, 0
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: s_cbranch_scc0 BB1_1
|
||||
; CHECK-NEXT: BB1_3: ; %entry
|
||||
; CHECK-NEXT: s_cbranch_scc0 .LBB1_1
|
||||
; CHECK-NEXT: .LBB1_3: ; %entry
|
||||
; CHECK-NEXT: s_not_b64 exec, exec
|
||||
; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
|
||||
; CHECK-NEXT: v_writelane_b32 v2, s0, 0
|
||||
; CHECK-NEXT: v_writelane_b32 v2, s1, 1
|
||||
; CHECK-NEXT: s_getpc_b64 s[0:1]
|
||||
; CHECK-NEXT: .Lpost_getpc1:
|
||||
; CHECK-NEXT: s_add_u32 s0, s0, (BB1_4-.Lpost_getpc1)&4294967295
|
||||
; CHECK-NEXT: s_addc_u32 s1, s1, (BB1_4-.Lpost_getpc1)>>32
|
||||
; CHECK-NEXT: s_add_u32 s0, s0, (.LBB1_4-.Lpost_getpc1)&4294967295
|
||||
; CHECK-NEXT: s_addc_u32 s1, s1, (.LBB1_4-.Lpost_getpc1)>>32
|
||||
; CHECK-NEXT: s_setpc_b64 s[0:1]
|
||||
; CHECK-NEXT: BB1_1: ; %bb2
|
||||
; CHECK-NEXT: .LBB1_1: ; %bb2
|
||||
; CHECK-NEXT: ;;#ASMSTART
|
||||
; CHECK-NEXT: v_nop_e64
|
||||
; CHECK-NEXT: v_nop_e64
|
||||
|
@ -1311,13 +1311,13 @@ define void @spill_func(i32 addrspace(1)* %arg) #0 {
|
|||
; CHECK-NEXT: v_nop_e64
|
||||
; CHECK-NEXT: v_nop_e64
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: s_branch BB1_2
|
||||
; CHECK-NEXT: BB1_4: ; %bb3
|
||||
; CHECK-NEXT: s_branch .LBB1_2
|
||||
; CHECK-NEXT: .LBB1_4: ; %bb3
|
||||
; CHECK-NEXT: v_readlane_b32 s0, v2, 0
|
||||
; CHECK-NEXT: v_readlane_b32 s1, v2, 1
|
||||
; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8
|
||||
; CHECK-NEXT: s_not_b64 exec, exec
|
||||
; CHECK-NEXT: BB1_2: ; %bb3
|
||||
; CHECK-NEXT: .LBB1_2: ; %bb3
|
||||
; CHECK-NEXT: ;;#ASMSTART
|
||||
; CHECK-NEXT: ; reg use s0
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
|
|
|
@ -4,13 +4,13 @@
|
|||
# block as the branch expansion.
|
||||
|
||||
# GCN-LABEL: long_branch_dbg_value:
|
||||
# GCN: BB0_5: ; %bb
|
||||
# GCN: .LBB0_5: ; %bb
|
||||
# GCN-NEXT: ;DEBUG_VALUE: test_debug_value:globalptr_arg <- [DW_OP_plus_uconst 12, DW_OP_stack_value]
|
||||
# GCN-NEXT: .loc 1 0 42 is_stmt 0 ; /tmp/test_debug_value.cl:0:42
|
||||
# GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
|
||||
# GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
|
||||
# GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], (BB0_4-[[POST_GETPC]])&4294967295
|
||||
# GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], (BB0_4-[[POST_GETPC]])>>32
|
||||
# GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], (.LBB0_4-[[POST_GETPC]])&4294967295
|
||||
# GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], (.LBB0_4-[[POST_GETPC]])>>32
|
||||
# GCN-NEXT: s_setpc_b64
|
||||
|
||||
--- |
|
||||
|
|
|
@ -6,14 +6,14 @@
|
|||
|
||||
; GCN-LABEL: long_forward_scc_branch_3f_offset_bug:
|
||||
; GFX1030: s_cmp_lg_u32
|
||||
; GFX1030-NEXT: s_cbranch_scc1 [[ENDBB:BB[0-9]+_[0-9]+]]
|
||||
; GFX1030-NEXT: s_cbranch_scc1 [[ENDBB:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; GFX1010: s_cmp_lg_u32
|
||||
; GFX1010-NEXT: s_cbranch_scc0 [[RELAX_BB:BB[0-9]+_[0-9]+]]
|
||||
; GFX1010-NEXT: s_cbranch_scc0 [[RELAX_BB:.LBB[0-9]+_[0-9]+]]
|
||||
; GFX1010: s_getpc_b64
|
||||
; GFX1010-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
|
||||
; GFX1010-NEXT: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ([[ENDBB:BB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
|
||||
; GFX1010-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ([[ENDBB:BB[0-9]+_[0-9]+]]-[[POST_GETPC]])>>32
|
||||
; GFX1010-NEXT: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ([[ENDBB:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
|
||||
; GFX1010-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ([[ENDBB:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])>>32
|
||||
; GFX1010: [[RELAX_BB]]:
|
||||
|
||||
; GCN: v_nop
|
||||
|
@ -53,16 +53,16 @@ bb3:
|
|||
; GCN-LABEL: {{^}}long_forward_exec_branch_3f_offset_bug:
|
||||
; GFX1030: v_cmp_eq_u32
|
||||
; GFX1030: s_and_saveexec_b32
|
||||
; GFX1030-NEXT: s_cbranch_execnz [[RELAX_BB:BB[0-9]+_[0-9]+]]
|
||||
; GFX1030-NEXT: s_cbranch_execnz [[RELAX_BB:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; GFX1010: v_cmp_eq_u32
|
||||
; GFX1010: s_and_saveexec_b32
|
||||
; GFX1010-NEXT: s_cbranch_execnz [[RELAX_BB:BB[0-9]+_[0-9]+]]
|
||||
; GFX1010-NEXT: s_cbranch_execnz [[RELAX_BB:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: s_getpc_b64
|
||||
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
|
||||
; GCN-NEXT: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ([[ENDBB:BB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
|
||||
; GCN-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ([[ENDBB:BB[0-9]+_[0-9]+]]-[[POST_GETPC]])>>32
|
||||
; GCN-NEXT: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ([[ENDBB:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
|
||||
; GCN-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ([[ENDBB:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])>>32
|
||||
; GCN: [[RELAX_BB]]:
|
||||
|
||||
; GCN: v_nop
|
||||
|
|
|
@ -21,7 +21,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
|
|||
; GCN-LABEL: {{^}}uniform_conditional_max_short_forward_branch:
|
||||
; GCN: s_load_dword [[CND:s[0-9]+]]
|
||||
; GCN: s_cmp_eq_u32 [[CND]], 0
|
||||
; GCN-NEXT: s_cbranch_scc1 [[BB3:BB[0-9]+_[0-9]+]]
|
||||
; GCN-NEXT: s_cbranch_scc1 [[BB3:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
|
||||
; GCN-NEXT: ; %bb.1: ; %bb2
|
||||
|
@ -58,12 +58,12 @@ bb3:
|
|||
; GCN-LABEL: {{^}}uniform_conditional_min_long_forward_branch:
|
||||
; GCN: s_load_dword [[CND:s[0-9]+]]
|
||||
; GCN: s_cmp_eq_u32 [[CND]], 0
|
||||
; GCN-NEXT: s_cbranch_scc0 [[LONGBB:BB[0-9]+_[0-9]+]]
|
||||
; GCN-NEXT: s_cbranch_scc0 [[LONGBB:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN-NEXT: {{BB[0-9]+_[0-9]+}}: ; %bb0
|
||||
; GCN-NEXT: {{.LBB[0-9]+_[0-9]+}}: ; %bb0
|
||||
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
|
||||
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
|
||||
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[ENDBB:BB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
|
||||
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[ENDBB:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
|
||||
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], ([[ENDBB]]-[[POST_GETPC]])>>32
|
||||
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
|
||||
|
||||
|
@ -103,12 +103,12 @@ bb3:
|
|||
|
||||
; GCN-DAG: v_cmp_eq_f32_e64 [[UNMASKED:s\[[0-9]+:[0-9]+\]]], [[CND]], 0
|
||||
; GCN-DAG: s_and_b64 vcc, exec, [[UNMASKED]]
|
||||
; GCN: s_cbranch_vccz [[LONGBB:BB[0-9]+_[0-9]+]]
|
||||
; GCN: s_cbranch_vccz [[LONGBB:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN-NEXT: {{BB[0-9]+_[0-9]+}}: ; %bb0
|
||||
; GCN-NEXT: {{.LBB[0-9]+_[0-9]+}}: ; %bb0
|
||||
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
|
||||
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
|
||||
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[ENDBB:BB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
|
||||
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[ENDBB:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
|
||||
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], ([[ENDBB]]-[[POST_GETPC]])>>32
|
||||
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
|
||||
|
||||
|
@ -179,7 +179,7 @@ bb3:
|
|||
; GCN-LABEL: {{^}}long_backward_sbranch:
|
||||
; GCN: s_mov_b32 [[LOOPIDX:s[0-9]+]], 0{{$}}
|
||||
|
||||
; GCN: [[LOOPBB:BB[0-9]+_[0-9]+]]: ; %bb2
|
||||
; GCN: .L[[LOOPBB:BB[0-9]+_[0-9]+]]: ; %bb2
|
||||
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: s_add_i32 [[INC:s[0-9]+]], [[LOOPIDX]], 1
|
||||
; GCN-NEXT: s_cmp_lt_i32 [[INC]], 10
|
||||
|
@ -190,15 +190,15 @@ bb3:
|
|||
; GCN-NEXT: v_nop_e64
|
||||
; GCN-NEXT: ;;#ASMEND
|
||||
|
||||
; GCN-NEXT: s_cbranch_scc0 [[ENDBB:BB[0-9]+_[0-9]+]]
|
||||
; GCN-NEXT: s_cbranch_scc0 [[ENDBB:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN-NEXT: {{BB[0-9]+_[0-9]+}}: ; %bb2
|
||||
; GCN-NEXT: {{.LBB[0-9]+_[0-9]+}}: ; %bb2
|
||||
; GCN-NEXT: ; in Loop: Header=[[LOOPBB]] Depth=1
|
||||
|
||||
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
|
||||
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
|
||||
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[LOOPBB]]-[[POST_GETPC]])&4294967295
|
||||
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], ([[LOOPBB]]-[[POST_GETPC]])>>32
|
||||
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], (.L[[LOOPBB]]-[[POST_GETPC]])&4294967295
|
||||
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], (.L[[LOOPBB]]-[[POST_GETPC]])>>32
|
||||
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
|
||||
|
||||
; GCN-NEXT: [[ENDBB]]:
|
||||
|
@ -227,13 +227,13 @@ bb3:
|
|||
|
||||
; GCN-LABEL: {{^}}uniform_unconditional_min_long_forward_branch:
|
||||
; GCN: s_cmp_eq_u32
|
||||
; GCN: s_cbranch_scc{{[0-1]}} [[BB2:BB[0-9]+_[0-9]+]]
|
||||
; GCN: s_cbranch_scc{{[0-1]}} [[BB2:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN-NEXT: {{BB[0-9]+_[0-9]+}}: ; %bb0
|
||||
; GCN-NEXT: {{.LBB[0-9]+_[0-9]+}}: ; %bb0
|
||||
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}}
|
||||
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
|
||||
; GCN-NEXT: s_add_u32 s[[PC0_LO]], s[[PC0_LO]], ([[BB3:BB[0-9]_[0-9]+]]-[[POST_GETPC]])&4294967295
|
||||
; GCN-NEXT: s_addc_u32 s[[PC0_HI]], s[[PC0_HI]], ([[BB3:BB[0-9]_[0-9]+]]-[[POST_GETPC]])>>32
|
||||
; GCN-NEXT: s_add_u32 s[[PC0_LO]], s[[PC0_LO]], ([[BB3:.LBB[0-9]_[0-9]+]]-[[POST_GETPC]])&4294967295
|
||||
; GCN-NEXT: s_addc_u32 s[[PC0_HI]], s[[PC0_HI]], ([[BB3:.LBB[0-9]_[0-9]+]]-[[POST_GETPC]])>>32
|
||||
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC0_LO]]:[[PC0_HI]]{{\]}}
|
||||
|
||||
; GCN: [[BB2]]: ; %bb3
|
||||
|
@ -277,7 +277,7 @@ bb4:
|
|||
; GCN-LABEL: {{^}}uniform_unconditional_min_long_backward_branch:
|
||||
; GCN-NEXT: ; %bb.0: ; %entry
|
||||
|
||||
; GCN-NEXT: [[LOOP:BB[0-9]_[0-9]+]]: ; %loop
|
||||
; GCN-NEXT: .L[[LOOP:BB[0-9]_[0-9]+]]: ; %loop
|
||||
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: ;;#ASMSTART
|
||||
; GCN-NEXT: v_nop_e64
|
||||
|
@ -286,13 +286,13 @@ bb4:
|
|||
; GCN-NEXT: v_nop_e64
|
||||
; GCN-NEXT: ;;#ASMEND
|
||||
|
||||
; GCN-NEXT: {{BB[0-9]+_[0-9]+}}: ; %loop
|
||||
; GCN-NEXT: {{.LBB[0-9]+_[0-9]+}}: ; %loop
|
||||
; GCN-NEXT: ; in Loop: Header=[[LOOP]] Depth=1
|
||||
|
||||
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
|
||||
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
|
||||
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[LOOP]]-[[POST_GETPC]])&4294967295
|
||||
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], ([[LOOP]]-[[POST_GETPC]])>>32
|
||||
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], (.L[[LOOP]]-[[POST_GETPC]])&4294967295
|
||||
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], (.L[[LOOP]]-[[POST_GETPC]])>>32
|
||||
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
|
||||
; GCN-NEXT: .Lfunc_end{{[0-9]+}}:
|
||||
define amdgpu_kernel void @uniform_unconditional_min_long_backward_branch(i32 addrspace(1)* %arg, i32 %arg1) {
|
||||
|
@ -322,13 +322,13 @@ loop:
|
|||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_cmp_lg_u32
|
||||
; GCN-NEXT: s_cselect_b64
|
||||
; GCN: s_cbranch_vccz [[BB2:BB[0-9]_[0-9]+]]
|
||||
; GCN: s_cbranch_vccz [[BB2:.LBB[0-9]_[0-9]+]]
|
||||
|
||||
; GCN-NEXT: {{BB[0-9]+_[0-9]+}}:
|
||||
; GCN-NEXT: {{.LBB[0-9]+_[0-9]+}}:
|
||||
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]{{\]}}
|
||||
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
|
||||
; GCN-NEXT: s_add_u32 s[[PC1_LO]], s[[PC1_LO]], ([[BB3:BB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
|
||||
; GCN-NEXT: s_addc_u32 s[[PC1_HI]], s[[PC1_HI]], ([[BB3:BB[0-9]+_[0-9]+]]-[[POST_GETPC]])>>32
|
||||
; GCN-NEXT: s_add_u32 s[[PC1_LO]], s[[PC1_LO]], ([[BB3:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
|
||||
; GCN-NEXT: s_addc_u32 s[[PC1_HI]], s[[PC1_HI]], ([[BB3:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])>>32
|
||||
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC1_LO]]:[[PC1_HI]]{{\]}}
|
||||
|
||||
; GCN-NEXT: [[BB2]]: ; %bb2
|
||||
|
@ -381,19 +381,19 @@ bb3:
|
|||
; GCN-LABEL: {{^}}uniform_inside_divergent:
|
||||
; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
|
||||
; GCN-NEXT: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; GCN-NEXT: s_cbranch_execnz [[IF:BB[0-9]+_[0-9]+]]
|
||||
; GCN-NEXT: s_cbranch_execnz [[IF:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN-NEXT: {{BB[0-9]+_[0-9]+}}: ; %entry
|
||||
; GCN-NEXT: {{.LBB[0-9]+_[0-9]+}}: ; %entry
|
||||
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
|
||||
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
|
||||
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[BB2:BB[0-9]_[0-9]+]]-[[POST_GETPC]])&4294967295
|
||||
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], ([[BB2:BB[0-9]_[0-9]+]]-[[POST_GETPC]])>>32
|
||||
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[BB2:.LBB[0-9]_[0-9]+]]-[[POST_GETPC]])&4294967295
|
||||
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], ([[BB2:.LBB[0-9]_[0-9]+]]-[[POST_GETPC]])>>32
|
||||
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
|
||||
|
||||
; GCN-NEXT: [[IF]]: ; %if
|
||||
; GCN: buffer_store_dword
|
||||
; GCN: s_cmp_lg_u32
|
||||
; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]]
|
||||
; GCN: s_cbranch_scc1 [[ENDIF:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN-NEXT: ; %bb.2: ; %if_uniform
|
||||
; GCN: buffer_store_dword
|
||||
|
@ -435,7 +435,7 @@ endif:
|
|||
; GCN-NEXT: s_or_saveexec_b64 [[TEMP_MASK1:s\[[0-9]+:[0-9]+\]]], [[MASK]]
|
||||
; GCN-NEXT: s_xor_b64 exec, exec, [[TEMP_MASK1]]
|
||||
|
||||
; GCN: [[LOOP_BODY:BB[0-9]+_[0-9]+]]: ; %loop{{$}}
|
||||
; GCN: .L[[LOOP_BODY:BB[0-9]+_[0-9]+]]: ; %loop{{$}}
|
||||
; GCN: ;;#ASMSTART
|
||||
; GCN: v_nop_e64
|
||||
; GCN: v_nop_e64
|
||||
|
@ -444,14 +444,14 @@ endif:
|
|||
; GCN: v_nop_e64
|
||||
; GCN: v_nop_e64
|
||||
; GCN: ;;#ASMEND
|
||||
; GCN: s_cbranch_{{vccz|vccnz}} [[RET:BB[0-9]+_[0-9]+]]
|
||||
; GCN: s_cbranch_{{vccz|vccnz}} [[RET:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN-NEXT: {{BB[0-9]+_[0-9]+}}: ; %loop
|
||||
; GCN-NEXT: {{.LBB[0-9]+_[0-9]+}}: ; %loop
|
||||
; GCN-NEXT: ; in Loop: Header=[[LOOP_BODY]] Depth=1
|
||||
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
|
||||
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
|
||||
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[LOOP_BODY]]-[[POST_GETPC]])&4294967295
|
||||
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], ([[LOOP_BODY]]-[[POST_GETPC]])>>32
|
||||
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], (.L[[LOOP_BODY]]-[[POST_GETPC]])&4294967295
|
||||
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], (.L[[LOOP_BODY]]-[[POST_GETPC]])>>32
|
||||
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
|
||||
|
||||
; GCN-NEXT: [[RET]]: ; %UnifiedReturnBlock
|
||||
|
@ -485,12 +485,12 @@ ret:
|
|||
|
||||
; GCN-LABEL: {{^}}long_branch_hang:
|
||||
; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 6
|
||||
; GCN: s_cbranch_scc{{[0-1]}} [[LONG_BR_0:BB[0-9]+_[0-9]+]]
|
||||
; GCN: s_cbranch_scc{{[0-1]}} [[LONG_BR_0:.LBB[0-9]+_[0-9]+]]
|
||||
; GCN-NEXT: BB{{[0-9]+_[0-9]+}}:
|
||||
|
||||
; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
|
||||
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
|
||||
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[LONG_BR_DEST0:BB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
|
||||
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[LONG_BR_DEST0:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
|
||||
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], ([[LONG_BR_DEST0]]-[[POST_GETPC]])>>32
|
||||
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
|
||||
; GCN-NEXT: [[LONG_BR_0]]:
|
||||
|
|
|
@ -14,7 +14,7 @@ define i64 @sdiv64(i64 %a, i64 %b) {
|
|||
; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GFX9-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; GFX9-NEXT: s_cbranch_execz BB0_2
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB0_2
|
||||
; GFX9-NEXT: ; %bb.1:
|
||||
; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v3
|
||||
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
|
||||
|
@ -135,10 +135,10 @@ define i64 @sdiv64(i64 %a, i64 %b) {
|
|||
; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v0, v2, vcc
|
||||
; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX9-NEXT: BB0_2: ; %Flow
|
||||
; GFX9-NEXT: .LBB0_2: ; %Flow
|
||||
; GFX9-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
|
||||
; GFX9-NEXT: s_xor_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: s_cbranch_execz BB0_4
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB0_4
|
||||
; GFX9-NEXT: ; %bb.3:
|
||||
; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2
|
||||
; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2
|
||||
|
@ -160,7 +160,7 @@ define i64 @sdiv64(i64 %a, i64 %b) {
|
|||
; GFX9-NEXT: v_add_u32_e32 v3, 1, v1
|
||||
; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
|
||||
; GFX9-NEXT: v_cndmask_b32_e32 v4, v1, v3, vcc
|
||||
; GFX9-NEXT: BB0_4:
|
||||
; GFX9-NEXT: .LBB0_4:
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, v4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, v5
|
||||
|
@ -179,7 +179,7 @@ define i64 @udiv64(i64 %a, i64 %b) {
|
|||
; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GFX9-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; GFX9-NEXT: s_cbranch_execz BB1_2
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB1_2
|
||||
; GFX9-NEXT: ; %bb.1:
|
||||
; GFX9-NEXT: v_cvt_f32_u32_e32 v4, v2
|
||||
; GFX9-NEXT: v_cvt_f32_u32_e32 v5, v3
|
||||
|
@ -285,10 +285,10 @@ define i64 @udiv64(i64 %a, i64 %b) {
|
|||
; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc
|
||||
; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX9-NEXT: BB1_2: ; %Flow
|
||||
; GFX9-NEXT: .LBB1_2: ; %Flow
|
||||
; GFX9-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
|
||||
; GFX9-NEXT: s_xor_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: s_cbranch_execz BB1_4
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB1_4
|
||||
; GFX9-NEXT: ; %bb.3:
|
||||
; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2
|
||||
; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2
|
||||
|
@ -310,7 +310,7 @@ define i64 @udiv64(i64 %a, i64 %b) {
|
|||
; GFX9-NEXT: v_add_u32_e32 v3, 1, v1
|
||||
; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
|
||||
; GFX9-NEXT: v_cndmask_b32_e32 v4, v1, v3, vcc
|
||||
; GFX9-NEXT: BB1_4:
|
||||
; GFX9-NEXT: .LBB1_4:
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, v4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, v5
|
||||
|
@ -329,7 +329,7 @@ define i64 @srem64(i64 %a, i64 %b) {
|
|||
; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GFX9-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
|
||||
; GFX9-NEXT: s_cbranch_execz BB2_2
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB2_2
|
||||
; GFX9-NEXT: ; %bb.1:
|
||||
; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v3
|
||||
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
|
||||
|
@ -448,10 +448,10 @@ define i64 @srem64(i64 %a, i64 %b) {
|
|||
; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v1, v6, vcc
|
||||
; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX9-NEXT: BB2_2: ; %Flow
|
||||
; GFX9-NEXT: .LBB2_2: ; %Flow
|
||||
; GFX9-NEXT: s_or_saveexec_b64 s[4:5], s[8:9]
|
||||
; GFX9-NEXT: s_xor_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: s_cbranch_execz BB2_4
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB2_4
|
||||
; GFX9-NEXT: ; %bb.3:
|
||||
; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2
|
||||
; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2
|
||||
|
@ -471,7 +471,7 @@ define i64 @srem64(i64 %a, i64 %b) {
|
|||
; GFX9-NEXT: v_sub_u32_e32 v1, v0, v2
|
||||
; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
|
||||
; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc
|
||||
; GFX9-NEXT: BB2_4:
|
||||
; GFX9-NEXT: .LBB2_4:
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, v4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, v5
|
||||
|
@ -490,7 +490,7 @@ define i64 @urem64(i64 %a, i64 %b) {
|
|||
; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GFX9-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
|
||||
; GFX9-NEXT: s_cbranch_execz BB3_2
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB3_2
|
||||
; GFX9-NEXT: ; %bb.1:
|
||||
; GFX9-NEXT: v_cvt_f32_u32_e32 v4, v2
|
||||
; GFX9-NEXT: v_cvt_f32_u32_e32 v5, v3
|
||||
|
@ -595,10 +595,10 @@ define i64 @urem64(i64 %a, i64 %b) {
|
|||
; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc
|
||||
; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX9-NEXT: BB3_2: ; %Flow
|
||||
; GFX9-NEXT: .LBB3_2: ; %Flow
|
||||
; GFX9-NEXT: s_or_saveexec_b64 s[4:5], s[8:9]
|
||||
; GFX9-NEXT: s_xor_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: s_cbranch_execz BB3_4
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB3_4
|
||||
; GFX9-NEXT: ; %bb.3:
|
||||
; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2
|
||||
; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2
|
||||
|
@ -618,7 +618,7 @@ define i64 @urem64(i64 %a, i64 %b) {
|
|||
; GFX9-NEXT: v_sub_u32_e32 v1, v0, v2
|
||||
; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
|
||||
; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc
|
||||
; GFX9-NEXT: BB3_4:
|
||||
; GFX9-NEXT: .LBB3_4:
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, v4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, v5
|
||||
|
@ -763,7 +763,7 @@ define <2 x i64> @sdivrem64(i64 %a, i64 %b) {
|
|||
; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GFX9-NEXT: s_xor_b64 s[10:11], exec, s[4:5]
|
||||
; GFX9-NEXT: s_cbranch_execz BB8_2
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB8_2
|
||||
; GFX9-NEXT: ; %bb.1:
|
||||
; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v3
|
||||
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
|
||||
|
@ -895,10 +895,10 @@ define <2 x i64> @sdivrem64(i64 %a, i64 %b) {
|
|||
; GFX9-NEXT: v_subb_co_u32_e32 v7, vcc, v1, v7, vcc
|
||||
; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX9-NEXT: BB8_2: ; %Flow
|
||||
; GFX9-NEXT: .LBB8_2: ; %Flow
|
||||
; GFX9-NEXT: s_or_saveexec_b64 s[4:5], s[10:11]
|
||||
; GFX9-NEXT: s_xor_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: s_cbranch_execz BB8_4
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB8_4
|
||||
; GFX9-NEXT: ; %bb.3:
|
||||
; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2
|
||||
; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2
|
||||
|
@ -923,7 +923,7 @@ define <2 x i64> @sdivrem64(i64 %a, i64 %b) {
|
|||
; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
|
||||
; GFX9-NEXT: v_cndmask_b32_e32 v6, v0, v3, vcc
|
||||
; GFX9-NEXT: v_cndmask_b32_e32 v4, v1, v4, vcc
|
||||
; GFX9-NEXT: BB8_4:
|
||||
; GFX9-NEXT: .LBB8_4:
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, v4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, v5
|
||||
|
@ -948,7 +948,7 @@ define <2 x i64> @udivrem64(i64 %a, i64 %b) {
|
|||
; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GFX9-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
|
||||
; GFX9-NEXT: s_cbranch_execz BB9_2
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB9_2
|
||||
; GFX9-NEXT: ; %bb.1:
|
||||
; GFX9-NEXT: v_cvt_f32_u32_e32 v4, v2
|
||||
; GFX9-NEXT: v_cvt_f32_u32_e32 v5, v3
|
||||
|
@ -1061,10 +1061,10 @@ define <2 x i64> @udivrem64(i64 %a, i64 %b) {
|
|||
; GFX9-NEXT: v_cndmask_b32_e32 v6, v0, v1, vcc
|
||||
; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX9-NEXT: BB9_2: ; %Flow
|
||||
; GFX9-NEXT: .LBB9_2: ; %Flow
|
||||
; GFX9-NEXT: s_or_saveexec_b64 s[4:5], s[8:9]
|
||||
; GFX9-NEXT: s_xor_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: s_cbranch_execz BB9_4
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB9_4
|
||||
; GFX9-NEXT: ; %bb.3:
|
||||
; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2
|
||||
; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2
|
||||
|
@ -1089,7 +1089,7 @@ define <2 x i64> @udivrem64(i64 %a, i64 %b) {
|
|||
; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
|
||||
; GFX9-NEXT: v_cndmask_b32_e32 v6, v0, v3, vcc
|
||||
; GFX9-NEXT: v_cndmask_b32_e32 v4, v1, v4, vcc
|
||||
; GFX9-NEXT: BB9_4:
|
||||
; GFX9-NEXT: .LBB9_4:
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, v4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, v5
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
; GCN-NOT: s_sub_u32
|
||||
|
||||
; GCN: s_and_saveexec_b64
|
||||
; GCN: s_cbranch_execz [[BB1:BB[0-9]+_[0-9]+]]
|
||||
; GCN: s_cbranch_execz [[BB1:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s32 glc{{$}}
|
||||
; GCN-NOT: s32
|
||||
|
|
|
@ -8,7 +8,7 @@ define hidden void @func() #1 {
|
|||
|
||||
; GCN-LABEL: {{^}}if_call:
|
||||
; GCN: s_and_saveexec_b64
|
||||
; GCN-NEXT: s_cbranch_execz [[END:BB[0-9]+_[0-9]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[END:.LBB[0-9]+_[0-9]+]]
|
||||
; GCN: s_swappc_b64
|
||||
; GCN: [[END]]:
|
||||
define void @if_call(i32 %flag) #0 {
|
||||
|
@ -25,7 +25,7 @@ end:
|
|||
|
||||
; GCN-LABEL: {{^}}if_asm:
|
||||
; GCN: s_and_saveexec_b64
|
||||
; GCN-NEXT: s_cbranch_execz [[END:BB[0-9]+_[0-9]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[END:.LBB[0-9]+_[0-9]+]]
|
||||
; GCN: ; sample asm
|
||||
; GCN: [[END]]:
|
||||
define void @if_asm(i32 %flag) #0 {
|
||||
|
@ -42,7 +42,7 @@ end:
|
|||
|
||||
; GCN-LABEL: {{^}}if_call_kernel:
|
||||
; GCN: s_and_saveexec_b64
|
||||
; GCN-NEXT: s_cbranch_execz BB3_2
|
||||
; GCN-NEXT: s_cbranch_execz .LBB3_2
|
||||
; GCN: s_swappc_b64
|
||||
define amdgpu_kernel void @if_call_kernel() #0 {
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
; GCN-LABEL: {{^}}test_loop:
|
||||
; GCN: s_and_b64 s[0:1], exec, -1
|
||||
; GCN: [[LABEL:BB[0-9]+_[0-9]+]]: ; %for.body{{$}}
|
||||
; GCN: [[LABEL:.LBB[0-9]+_[0-9]+]]: ; %for.body{{$}}
|
||||
; GCN: ds_read_b32
|
||||
; GCN: ds_write_b32
|
||||
; GCN: s_cbranch_vccnz [[LABEL]]
|
||||
|
@ -28,7 +28,7 @@ for.body:
|
|||
}
|
||||
|
||||
; GCN-LABEL: @loop_const_true
|
||||
; GCN: [[LABEL:BB[0-9]+_[0-9]+]]:
|
||||
; GCN: [[LABEL:.LBB[0-9]+_[0-9]+]]:
|
||||
; GCN: ds_read_b32
|
||||
; GCN: ds_write_b32
|
||||
; GCN: s_branch [[LABEL]]
|
||||
|
@ -99,7 +99,7 @@ for.body:
|
|||
; GCN: v_cmp_eq_u32{{[^,]*}}, 1,
|
||||
; GCN: s_add_i32 s2, s0, 0x80
|
||||
|
||||
; GCN: [[LOOPBB:BB[0-9]+_[0-9]+]]
|
||||
; GCN: [[LOOPBB:.LBB[0-9]+_[0-9]+]]
|
||||
; GCN: _add_i32_e32 v0, vcc, 4, v0
|
||||
|
||||
; GCN: s_cbranch_{{vccz|vccnz}} [[LOOPBB]]
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
|
||||
; GCN-LABEL: {{^}}test_no_sink_flat_small_offset_i32:
|
||||
; GCN: flat_load_dword
|
||||
; GCN: {{^}}BB0_2:
|
||||
; GCN: {{^}}.LBB0_2:
|
||||
define amdgpu_kernel void @test_no_sink_flat_small_offset_i32(i32* %out, i32* %in, i32 %cond) {
|
||||
entry:
|
||||
%out.gep = getelementptr i32, i32* %out, i64 999999
|
||||
|
|
|
@ -30,13 +30,13 @@ define amdgpu_kernel void @test_sink_small_offset_global_atomic_csub_i32(i32 add
|
|||
; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: s_and_saveexec_b32 s4, vcc_lo
|
||||
; GCN-NEXT: s_cbranch_execz BB0_2
|
||||
; GCN-NEXT: s_cbranch_execz .LBB0_2
|
||||
; GCN-NEXT: ; %bb.1: ; %if
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 2
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: global_atomic_csub v0, v0, v1, s[2:3] offset:28 glc
|
||||
; GCN-NEXT: BB0_2: ; %endif
|
||||
; GCN-NEXT: .LBB0_2: ; %endif
|
||||
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s4
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0x3d0800
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
|
|
|
@ -30,7 +30,7 @@ define amdgpu_kernel void @test_sink_small_offset_global_atomic_fadd_f32(float a
|
|||
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GCN-NEXT: s_cbranch_execz BB0_2
|
||||
; GCN-NEXT: s_cbranch_execz .LBB0_2
|
||||
; GCN-NEXT: ; %bb.1: ; %if
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 2.0
|
||||
|
@ -38,7 +38,7 @@ define amdgpu_kernel void @test_sink_small_offset_global_atomic_fadd_f32(float a
|
|||
; GCN-NEXT: global_atomic_add_f32 v0, v1, s[2:3] offset:28
|
||||
; GCN-NEXT: global_load_dword v0, v[0:1], off glc
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: BB0_2: ; %endif
|
||||
; GCN-NEXT: .LBB0_2: ; %endif
|
||||
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0x3d0000
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
|
|
|
@ -47,7 +47,7 @@ done:
|
|||
|
||||
; GFX9: v_mov_b32_e32 [[VOFFSET:v[0-9]+]], 0xf000{{$}}
|
||||
; GFX9: global_load_sbyte {{v[0-9]+}}, [[VOFFSET]], {{s\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
|
||||
; GCN: {{^}}BB1_2:
|
||||
; GCN: {{^}}.LBB1_2:
|
||||
; GCN: s_or_b64 exec
|
||||
define amdgpu_kernel void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
|
||||
entry:
|
||||
|
@ -76,7 +76,7 @@ done:
|
|||
; SICIVI: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}}
|
||||
; GFX9: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
|
||||
; GFX9: global_load_sbyte {{v[0-9]+}}, [[ZERO]], {{s\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
|
||||
; GCN: {{^}}BB2_2:
|
||||
; GCN: {{^}}.LBB2_2:
|
||||
; GCN: s_or_b64 exec
|
||||
define amdgpu_kernel void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
|
||||
entry:
|
||||
|
@ -105,7 +105,7 @@ done:
|
|||
; SICIVI: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
|
||||
; GFX9: v_mov_b32_e32 [[VOFFSET:v[0-9]+]], 0x1000{{$}}
|
||||
; GFX9: global_load_sbyte {{v[0-9]+}}, [[VOFFSET]], {{s\[[0-9]+:[0-9]+\]$}}
|
||||
; GCN: {{^}}BB3_2:
|
||||
; GCN: {{^}}.LBB3_2:
|
||||
; GCN: s_or_b64 exec
|
||||
define amdgpu_kernel void @test_sink_global_small_max_plus_1_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
|
||||
entry:
|
||||
|
@ -138,7 +138,7 @@ done:
|
|||
; GCN: s_and_saveexec_b64
|
||||
; GCN: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4092{{$}}
|
||||
; GCN: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4092 glc{{$}}
|
||||
; GCN: {{^}}BB4_2:
|
||||
; GCN: {{^}}.LBB4_2:
|
||||
define amdgpu_kernel void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
|
||||
entry:
|
||||
%alloca = alloca [512 x i32], align 4, addrspace(5)
|
||||
|
@ -178,7 +178,7 @@ done:
|
|||
; GCN: buffer_store_dword {{v[0-9]+}}, [[BASE_FI0]], {{s\[[0-9]+:[0-9]+\]}}, 0 offen offset:4092{{$}}
|
||||
; GCN: v_mov_b32_e32 [[BASE_FI1:v[0-9]+]], 4
|
||||
; GCN: buffer_load_dword {{v[0-9]+}}, [[BASE_FI1]], {{s\[[0-9]+:[0-9]+\]}}, 0 offen offset:4092 glc{{$}}
|
||||
; GCN: {{^BB[0-9]+}}_2:
|
||||
; GCN: {{^.LBB[0-9]+}}_2:
|
||||
|
||||
define amdgpu_kernel void @test_sink_scratch_small_offset_i32_reserved(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
|
||||
entry:
|
||||
|
@ -216,7 +216,7 @@ done:
|
|||
; GCN: s_and_saveexec_b64
|
||||
; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
|
||||
; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen glc{{$}}
|
||||
; GCN: {{^BB[0-9]+}}_2:
|
||||
; GCN: {{^.LBB[0-9]+}}_2:
|
||||
define amdgpu_kernel void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
|
||||
entry:
|
||||
%alloca = alloca [512 x i32], align 4, addrspace(5)
|
||||
|
@ -248,7 +248,7 @@ done:
|
|||
; GCN: s_and_saveexec_b64
|
||||
; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
|
||||
; GCN: {{^BB[0-9]+}}_2:
|
||||
; GCN: {{^.LBB[0-9]+}}_2:
|
||||
define amdgpu_kernel void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) {
|
||||
entry:
|
||||
%offset.ext = zext i32 %offset to i64
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
; GCN: s_cbranch_scc{{[0-1]}}
|
||||
|
||||
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70008
|
||||
; GCN: BB0_3:
|
||||
; GCN: .LBB0_3:
|
||||
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008
|
||||
|
||||
; GCN: buffer_store_dword
|
||||
|
@ -115,7 +115,7 @@ ret:
|
|||
; OPT: store
|
||||
; OPT: ret
|
||||
|
||||
; For GFX8: since i16 is legal type, we cannot sink lshr into BBs.
|
||||
; For GFX8: since i16 is legal type, we cannot sink lshr into .LBBs.
|
||||
|
||||
; GCN-LABEL: {{^}}sink_ubfe_i16:
|
||||
; GCN-NOT: lshr
|
||||
|
@ -126,7 +126,7 @@ ret:
|
|||
; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70004
|
||||
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0x7f
|
||||
|
||||
; GCN: BB2_3:
|
||||
; GCN: .LBB2_3:
|
||||
; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80004
|
||||
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0xff
|
||||
|
||||
|
@ -175,11 +175,11 @@ ret:
|
|||
|
||||
; GCN-LABEL: {{^}}sink_ubfe_i64_span_midpoint:
|
||||
|
||||
; GCN: s_cbranch_scc{{[0-1]}} BB3_2
|
||||
; GCN: s_cbranch_scc{{[0-1]}} .LBB3_2
|
||||
; GCN: v_alignbit_b32 v[[LO:[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, 30
|
||||
; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x7f, v[[LO]]
|
||||
|
||||
; GCN: BB3_3:
|
||||
; GCN: .LBB3_3:
|
||||
; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xff, v[[LO]]
|
||||
|
||||
; GCN: buffer_store_dwordx2
|
||||
|
@ -223,11 +223,11 @@ ret:
|
|||
|
||||
; GCN-LABEL: {{^}}sink_ubfe_i64_low32:
|
||||
|
||||
; GCN: s_cbranch_scc{{[0-1]}} BB4_2
|
||||
; GCN: s_cbranch_scc{{[0-1]}} .LBB4_2
|
||||
|
||||
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7000f
|
||||
|
||||
; GCN: BB4_3:
|
||||
; GCN: .LBB4_3:
|
||||
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x8000f
|
||||
|
||||
; GCN: buffer_store_dwordx2
|
||||
|
@ -270,10 +270,10 @@ ret:
|
|||
; OPT: ret
|
||||
|
||||
; GCN-LABEL: {{^}}sink_ubfe_i64_high32:
|
||||
; GCN: s_cbranch_scc{{[0-1]}} BB5_2
|
||||
; GCN: s_cbranch_scc{{[0-1]}} .LBB5_2
|
||||
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70003
|
||||
|
||||
; GCN: BB5_3:
|
||||
; GCN: .LBB5_3:
|
||||
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80003
|
||||
|
||||
; GCN: buffer_store_dwordx2
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
; GCN-LABEL: {{^}}simple_nested_if:
|
||||
; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF:BB[0-9_]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF:.LBB[0-9_]+]]
|
||||
; GCN: s_and_b64 exec, exec, vcc
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF]]
|
||||
; GCN-NEXT: ; %bb.{{[0-9]+}}:
|
||||
|
@ -37,9 +37,9 @@ bb.outer.end: ; preds = %bb.outer.then, %bb.
|
|||
|
||||
; GCN-LABEL: {{^}}uncollapsable_nested_if:
|
||||
; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:BB[0-9_]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]]
|
||||
; GCN: s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]]
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF_INNER:BB[0-9_]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF_INNER:.LBB[0-9_]+]]
|
||||
; GCN-NEXT: ; %bb.{{[0-9]+}}:
|
||||
; GCN: store_dword
|
||||
; GCN-NEXT: {{^}}[[ENDIF_INNER]]:
|
||||
|
@ -80,10 +80,10 @@ bb.outer.end: ; preds = %bb.inner.then, %bb
|
|||
|
||||
; GCN-LABEL: {{^}}nested_if_if_else:
|
||||
; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:BB[0-9_]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]]
|
||||
; GCN: s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]]
|
||||
; GCN-NEXT: s_xor_b64 [[SAVEEXEC_INNER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_INNER]]
|
||||
; GCN-NEXT: s_cbranch_execz [[THEN_INNER:BB[0-9_]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[THEN_INNER:.LBB[0-9_]+]]
|
||||
; GCN-NEXT: ; %bb.{{[0-9]+}}:
|
||||
; GCN: store_dword
|
||||
; GCN: {{^}}[[THEN_INNER]]:
|
||||
|
@ -127,11 +127,11 @@ bb.outer.end: ; preds = %bb, %bb.then, %b
|
|||
; GCN-LABEL: {{^}}nested_if_else_if:
|
||||
; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
|
||||
; GCN-NEXT: s_xor_b64 [[SAVEEXEC_OUTER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_OUTER]]
|
||||
; GCN-NEXT: s_cbranch_execz [[THEN_OUTER:BB[0-9_]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[THEN_OUTER:.LBB[0-9_]+]]
|
||||
; GCN-NEXT: ; %bb.{{[0-9]+}}:
|
||||
; GCN: store_dword
|
||||
; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_ELSE:s\[[0-9:]+\]]]
|
||||
; GCN-NEXT: s_cbranch_execz [[THEN_OUTER_FLOW:BB[0-9_]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[THEN_OUTER_FLOW:.LBB[0-9_]+]]
|
||||
; GCN-NEXT: ; %bb.{{[0-9]+}}:
|
||||
; GCN: store_dword
|
||||
; GCN-NEXT: {{^}}[[THEN_OUTER_FLOW]]:
|
||||
|
@ -139,11 +139,11 @@ bb.outer.end: ; preds = %bb, %bb.then, %b
|
|||
; GCN: {{^}}[[THEN_OUTER]]:
|
||||
; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_OUTER3:s\[[0-9:]+\]]], [[SAVEEXEC_OUTER2]]
|
||||
; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_OUTER3]]
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:BB[0-9_]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]]
|
||||
; GCN-NEXT: ; %bb.{{[0-9]+}}:
|
||||
; GCN: store_dword
|
||||
; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_ELSE:s\[[0-9:]+\]]],
|
||||
; GCN-NEXT: s_cbranch_execz [[FLOW1:BB[0-9_]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[FLOW1:.LBB[0-9_]+]]
|
||||
; GCN-NEXT: ; %bb.{{[0-9]+}}:
|
||||
; GCN: store_dword
|
||||
; GCN-NEXT: [[FLOW1]]:
|
||||
|
@ -188,7 +188,7 @@ bb.outer.end:
|
|||
|
||||
; GCN-LABEL: {{^}}s_endpgm_unsafe_barrier:
|
||||
; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF:BB[0-9_]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF:.LBB[0-9_]+]]
|
||||
; GCN-NEXT: ; %bb.{{[0-9]+}}:
|
||||
; GCN: store_dword
|
||||
; GCN-NEXT: {{^}}[[ENDIF]]:
|
||||
|
@ -213,15 +213,15 @@ bb.end: ; preds = %bb.then, %bb
|
|||
|
||||
; GCN-LABEL: {{^}}scc_liveness:
|
||||
|
||||
; GCN: [[BB1_OUTER_LOOP:BB[0-9]+_[0-9]+]]:
|
||||
; GCN: [[BB1_OUTER_LOOP:.LBB[0-9]+_[0-9]+]]:
|
||||
; GCN: s_or_b64 exec, exec, [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
|
||||
;
|
||||
; GCN: [[BB1_INNER_LOOP:BB[0-9]+_[0-9]+]]:
|
||||
; GCN: [[BB1_INNER_LOOP:.LBB[0-9]+_[0-9]+]]:
|
||||
; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
|
||||
; GCN: s_andn2_b64
|
||||
; GCN-NEXT: s_cbranch_execz
|
||||
|
||||
; GCN: [[BB1_LOOP:BB[0-9]+_[0-9]+]]:
|
||||
; GCN: [[BB1_LOOP:.LBB[0-9]+_[0-9]+]]:
|
||||
; GCN: s_andn2_b64 exec, exec,
|
||||
; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]]
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, [[CMP0]]
|
||||
; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}
|
||||
|
||||
; GCN: s_cbranch_execz [[ENDIF:BB[0-9]+_[0-9]+]]
|
||||
; GCN: s_cbranch_execz [[ENDIF:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: ; %bb.{{[0-9]+}}: ; %if
|
||||
; GCN: buffer_load_dword [[RELOAD_LOAD0:v[0-9]+]], off, s[0:3], 0 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
|
||||
|
@ -105,10 +105,10 @@ endif:
|
|||
|
||||
; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]]
|
||||
; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}
|
||||
; GCN-NEXT: s_cbranch_execz [[END:BB[0-9]+_[0-9]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[END:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
|
||||
; GCN: [[LOOP:BB[0-9]+_[0-9]+]]:
|
||||
; GCN: [[LOOP:.LBB[0-9]+_[0-9]+]]:
|
||||
; GCN: buffer_load_dword v[[VAL_LOOP_RELOAD:[0-9]+]], off, s[0:3], 0 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
|
||||
; GCN: v_subrev_i32_e32 [[VAL_LOOP:v[0-9]+]], vcc, v{{[0-9]+}}, v[[VAL_LOOP_RELOAD]]
|
||||
; GCN: s_cmp_lg_u32
|
||||
|
@ -183,8 +183,8 @@ end:
|
|||
; GCN: s_mov_b64 exec, [[CMP0]]
|
||||
|
||||
; FIXME: It makes no sense to put this skip here
|
||||
; GCN: s_cbranch_execz [[FLOW:BB[0-9]+_[0-9]+]]
|
||||
; GCN-NEXT: s_branch [[ELSE:BB[0-9]+_[0-9]+]]
|
||||
; GCN: s_cbranch_execz [[FLOW:.LBB[0-9]+_[0-9]+]]
|
||||
; GCN-NEXT: s_branch [[ELSE:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: [[FLOW]]: ; %Flow
|
||||
; VGPR: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]]
|
||||
|
@ -213,7 +213,7 @@ end:
|
|||
; VMEM: buffer_store_dword v[[FLOW_V_SAVEEXEC]], off, s[0:3], 0 offset:[[FLOW_SAVEEXEC_OFFSET:[0-9]+]] ; 4-byte Folded Spill
|
||||
|
||||
; GCN: s_xor_b64 exec, exec, s{{\[}}[[FLOW_AND_EXEC_LO]]:[[FLOW_AND_EXEC_HI]]{{\]}}
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF:BB[0-9]+_[0-9]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
|
||||
; GCN: ; %bb.{{[0-9]+}}: ; %if
|
||||
|
@ -221,7 +221,7 @@ end:
|
|||
; GCN: ds_read_b32
|
||||
; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, v{{[0-9]+}}, v[[LOAD0_RELOAD]]
|
||||
; GCN: buffer_store_dword [[ADD]], off, s[0:3], 0 offset:[[RESULT_OFFSET]] ; 4-byte Folded Spill
|
||||
; GCN-NEXT: s_branch [[ENDIF:BB[0-9]+_[0-9]+]]
|
||||
; GCN-NEXT: s_branch [[ENDIF:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: [[ELSE]]: ; %else
|
||||
; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], 0 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
|
||||
|
|
|
@ -178,19 +178,19 @@ define amdgpu_kernel void @v3i16_registers(i1 %cond) #0 {
|
|||
; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, s[4:5]
|
||||
; GCN-NEXT: s_mov_b32 s32, 0
|
||||
; GCN-NEXT: s_cbranch_vccnz BB4_2
|
||||
; GCN-NEXT: s_cbranch_vccnz .LBB4_2
|
||||
; GCN-NEXT: ; %bb.1: ; %if.else
|
||||
; GCN-NEXT: s_getpc_b64 s[4:5]
|
||||
; GCN-NEXT: s_add_u32 s4, s4, func_v3i16@rel32@lo+4
|
||||
; GCN-NEXT: s_addc_u32 s5, s5, func_v3i16@rel32@hi+12
|
||||
; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GCN-NEXT: s_branch BB4_3
|
||||
; GCN-NEXT: BB4_2:
|
||||
; GCN-NEXT: s_branch .LBB4_3
|
||||
; GCN-NEXT: .LBB4_2:
|
||||
; GCN-NEXT: s_mov_b32 s4, 0
|
||||
; GCN-NEXT: s_mov_b32 s5, s4
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GCN-NEXT: BB4_3: ; %if.end
|
||||
; GCN-NEXT: .LBB4_3: ; %if.end
|
||||
; GCN-NEXT: global_store_short v[0:1], v1, off
|
||||
; GCN-NEXT: global_store_dword v[0:1], v0, off
|
||||
; GCN-NEXT: s_endpgm
|
||||
|
@ -223,19 +223,19 @@ define amdgpu_kernel void @v3f16_registers(i1 %cond) #0 {
|
|||
; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, s[4:5]
|
||||
; GCN-NEXT: s_mov_b32 s32, 0
|
||||
; GCN-NEXT: s_cbranch_vccnz BB5_2
|
||||
; GCN-NEXT: s_cbranch_vccnz .LBB5_2
|
||||
; GCN-NEXT: ; %bb.1: ; %if.else
|
||||
; GCN-NEXT: s_getpc_b64 s[4:5]
|
||||
; GCN-NEXT: s_add_u32 s4, s4, func_v3f16@rel32@lo+4
|
||||
; GCN-NEXT: s_addc_u32 s5, s5, func_v3f16@rel32@hi+12
|
||||
; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GCN-NEXT: s_branch BB5_3
|
||||
; GCN-NEXT: BB5_2:
|
||||
; GCN-NEXT: s_branch .LBB5_3
|
||||
; GCN-NEXT: .LBB5_2:
|
||||
; GCN-NEXT: s_mov_b32 s4, 0
|
||||
; GCN-NEXT: s_mov_b32 s5, s4
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GCN-NEXT: BB5_3: ; %if.end
|
||||
; GCN-NEXT: .LBB5_3: ; %if.end
|
||||
; GCN-NEXT: global_store_short v[0:1], v1, off
|
||||
; GCN-NEXT: global_store_dword v[0:1], v0, off
|
||||
; GCN-NEXT: s_endpgm
|
||||
|
|
|
@ -1490,7 +1490,7 @@ define amdgpu_kernel void @ctpop_i16_in_br(i16 addrspace(1)* %out, i16 addrspace
|
|||
; SI-NEXT: s_lshr_b32 s2, s4, 16
|
||||
; SI-NEXT: s_cmp_lg_u32 s2, 0
|
||||
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
||||
; SI-NEXT: s_cbranch_scc0 BB14_2
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB14_2
|
||||
; SI-NEXT: ; %bb.1: ; %else
|
||||
; SI-NEXT: s_mov_b32 s11, 0xf000
|
||||
; SI-NEXT: s_mov_b32 s10, -1
|
||||
|
@ -1499,18 +1499,18 @@ define amdgpu_kernel void @ctpop_i16_in_br(i16 addrspace(1)* %out, i16 addrspace
|
|||
; SI-NEXT: s_mov_b32 s9, s3
|
||||
; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 offset:2
|
||||
; SI-NEXT: s_mov_b64 s[2:3], 0
|
||||
; SI-NEXT: s_cbranch_execz BB14_3
|
||||
; SI-NEXT: s_branch BB14_4
|
||||
; SI-NEXT: BB14_2:
|
||||
; SI-NEXT: s_cbranch_execz .LBB14_3
|
||||
; SI-NEXT: s_branch .LBB14_4
|
||||
; SI-NEXT: .LBB14_2:
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: s_mov_b64 s[2:3], -1
|
||||
; SI-NEXT: v_mov_b32_e32 v0, 0
|
||||
; SI-NEXT: BB14_3: ; %if
|
||||
; SI-NEXT: .LBB14_3: ; %if
|
||||
; SI-NEXT: s_and_b32 s2, s4, 0xffff
|
||||
; SI-NEXT: s_bcnt1_i32_b32 s2, s2
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: v_mov_b32_e32 v0, s2
|
||||
; SI-NEXT: BB14_4: ; %endif
|
||||
; SI-NEXT: .LBB14_4: ; %endif
|
||||
; SI-NEXT: s_mov_b32 s3, 0xf000
|
||||
; SI-NEXT: s_mov_b32 s2, -1
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -1525,23 +1525,23 @@ define amdgpu_kernel void @ctpop_i16_in_br(i16 addrspace(1)* %out, i16 addrspace
|
|||
; VI-NEXT: s_lshr_b32 s0, s2, 16
|
||||
; VI-NEXT: v_cmp_ne_u16_e64 s[0:1], s0, 0
|
||||
; VI-NEXT: s_and_b64 vcc, exec, s[0:1]
|
||||
; VI-NEXT: s_cbranch_vccz BB14_2
|
||||
; VI-NEXT: s_cbranch_vccz .LBB14_2
|
||||
; VI-NEXT: ; %bb.1: ; %else
|
||||
; VI-NEXT: s_mov_b32 s11, 0xf000
|
||||
; VI-NEXT: s_mov_b32 s10, -1
|
||||
; VI-NEXT: s_mov_b32 s8, s6
|
||||
; VI-NEXT: s_mov_b32 s9, s7
|
||||
; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 offset:2
|
||||
; VI-NEXT: s_cbranch_execz BB14_3
|
||||
; VI-NEXT: s_branch BB14_4
|
||||
; VI-NEXT: BB14_2:
|
||||
; VI-NEXT: s_cbranch_execz .LBB14_3
|
||||
; VI-NEXT: s_branch .LBB14_4
|
||||
; VI-NEXT: .LBB14_2:
|
||||
; VI-NEXT: ; implicit-def: $vgpr0
|
||||
; VI-NEXT: BB14_3: ; %if
|
||||
; VI-NEXT: .LBB14_3: ; %if
|
||||
; VI-NEXT: s_and_b32 s0, s2, 0xffff
|
||||
; VI-NEXT: s_bcnt1_i32_b32 s0, s0
|
||||
; VI-NEXT: s_waitcnt vmcnt(0)
|
||||
; VI-NEXT: v_mov_b32_e32 v0, s0
|
||||
; VI-NEXT: BB14_4: ; %endif
|
||||
; VI-NEXT: .LBB14_4: ; %endif
|
||||
; VI-NEXT: s_mov_b32 s7, 0xf000
|
||||
; VI-NEXT: s_mov_b32 s6, -1
|
||||
; VI-NEXT: s_waitcnt vmcnt(0)
|
||||
|
|
|
@ -125,7 +125,7 @@ define protected amdgpu_kernel void @nand(i32 addrspace(1)* %p, %S addrspace(1)*
|
|||
; CHECK-NEXT: s_load_dword s6, s[0:1], 0x0
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, s6
|
||||
; CHECK-NEXT: BB5_1: ; %atomicrmw.start
|
||||
; CHECK-NEXT: .LBB5_1: ; %atomicrmw.start
|
||||
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: v_mov_b32_e32 v3, v1
|
||||
; CHECK-NEXT: v_not_b32_e32 v1, v3
|
||||
|
@ -135,7 +135,7 @@ define protected amdgpu_kernel void @nand(i32 addrspace(1)* %p, %S addrspace(1)*
|
|||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
|
||||
; CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
||||
; CHECK-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_execnz BB5_1
|
||||
; CHECK-NEXT: s_cbranch_execnz .LBB5_1
|
||||
; CHECK-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v1, 12, s[2:3]
|
||||
|
@ -335,7 +335,7 @@ define protected amdgpu_kernel void @fadd(float addrspace(1)* %p, %S addrspace(1
|
|||
; CHECK-NEXT: s_load_dword s6, s[0:1], 0x0
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, s6
|
||||
; CHECK-NEXT: BB14_1: ; %atomicrmw.start
|
||||
; CHECK-NEXT: .LBB14_1: ; %atomicrmw.start
|
||||
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: v_mov_b32_e32 v3, v1
|
||||
; CHECK-NEXT: v_add_f32_e32 v2, 1.0, v3
|
||||
|
@ -344,7 +344,7 @@ define protected amdgpu_kernel void @fadd(float addrspace(1)* %p, %S addrspace(1
|
|||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
|
||||
; CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
||||
; CHECK-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_execnz BB14_1
|
||||
; CHECK-NEXT: s_cbranch_execnz .LBB14_1
|
||||
; CHECK-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v1
|
||||
|
@ -370,7 +370,7 @@ define protected amdgpu_kernel void @fsub(float addrspace(1)* %p, %S addrspace(1
|
|||
; CHECK-NEXT: s_load_dword s6, s[0:1], 0x0
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, s6
|
||||
; CHECK-NEXT: BB15_1: ; %atomicrmw.start
|
||||
; CHECK-NEXT: .LBB15_1: ; %atomicrmw.start
|
||||
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: v_mov_b32_e32 v3, v1
|
||||
; CHECK-NEXT: v_add_f32_e32 v2, -1.0, v3
|
||||
|
@ -379,7 +379,7 @@ define protected amdgpu_kernel void @fsub(float addrspace(1)* %p, %S addrspace(1
|
|||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
|
||||
; CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
||||
; CHECK-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_execnz BB15_1
|
||||
; CHECK-NEXT: s_cbranch_execnz .LBB15_1
|
||||
; CHECK-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v1
|
||||
|
|
|
@ -26,13 +26,13 @@ define amdgpu_ps void @main(i32 %0, float %1) {
|
|||
; ISA-NEXT: s_mov_b64 s[0:1], 0
|
||||
; ISA-NEXT: ; implicit-def: $sgpr4_sgpr5
|
||||
; ISA-NEXT: ; implicit-def: $sgpr2_sgpr3
|
||||
; ISA-NEXT: s_branch BB0_3
|
||||
; ISA-NEXT: BB0_1: ; %Flow1
|
||||
; ISA-NEXT: s_branch .LBB0_3
|
||||
; ISA-NEXT: .LBB0_1: ; %Flow1
|
||||
; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1
|
||||
; ISA-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; ISA-NEXT: s_add_i32 s8, s8, 1
|
||||
; ISA-NEXT: s_mov_b64 s[6:7], 0
|
||||
; ISA-NEXT: BB0_2: ; %Flow
|
||||
; ISA-NEXT: .LBB0_2: ; %Flow
|
||||
; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1
|
||||
; ISA-NEXT: s_and_b64 s[10:11], exec, s[4:5]
|
||||
; ISA-NEXT: s_or_b64 s[0:1], s[10:11], s[0:1]
|
||||
|
@ -40,23 +40,23 @@ define amdgpu_ps void @main(i32 %0, float %1) {
|
|||
; ISA-NEXT: s_and_b64 s[6:7], s[6:7], exec
|
||||
; ISA-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7]
|
||||
; ISA-NEXT: s_andn2_b64 exec, exec, s[0:1]
|
||||
; ISA-NEXT: s_cbranch_execz BB0_6
|
||||
; ISA-NEXT: BB0_3: ; %loop
|
||||
; ISA-NEXT: s_cbranch_execz .LBB0_6
|
||||
; ISA-NEXT: .LBB0_3: ; %loop
|
||||
; ISA-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; ISA-NEXT: s_or_b64 s[4:5], s[4:5], exec
|
||||
; ISA-NEXT: s_cmp_lt_u32 s8, 32
|
||||
; ISA-NEXT: s_mov_b64 s[6:7], -1
|
||||
; ISA-NEXT: s_cbranch_scc0 BB0_2
|
||||
; ISA-NEXT: s_cbranch_scc0 .LBB0_2
|
||||
; ISA-NEXT: ; %bb.4: ; %endif1
|
||||
; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1
|
||||
; ISA-NEXT: s_mov_b64 s[4:5], -1
|
||||
; ISA-NEXT: s_and_saveexec_b64 s[6:7], vcc
|
||||
; ISA-NEXT: s_cbranch_execz BB0_1
|
||||
; ISA-NEXT: s_cbranch_execz .LBB0_1
|
||||
; ISA-NEXT: ; %bb.5: ; %endif2
|
||||
; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1
|
||||
; ISA-NEXT: s_xor_b64 s[4:5], exec, -1
|
||||
; ISA-NEXT: s_branch BB0_1
|
||||
; ISA-NEXT: BB0_6: ; %Flow2
|
||||
; ISA-NEXT: s_branch .LBB0_1
|
||||
; ISA-NEXT: .LBB0_6: ; %Flow2
|
||||
; ISA-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; ISA-NEXT: v_mov_b32_e32 v1, 0
|
||||
; ISA-NEXT: s_and_saveexec_b64 s[0:1], s[2:3]
|
||||
|
|
|
@ -112,7 +112,7 @@ endif:
|
|||
|
||||
; Short chain of cheap instructions to not convert
|
||||
; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle_min_expensive:
|
||||
; GCN: s_cbranch_vccnz [[ENDIF:BB[0-9]+_[0-9]+]]
|
||||
; GCN: s_cbranch_vccnz [[ENDIF:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: v_mul_f32
|
||||
; GCN: v_mul_f32
|
||||
|
@ -155,7 +155,7 @@ endif:
|
|||
; Should still branch over fdiv expansion
|
||||
; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle_expensive:
|
||||
; GCN: v_cmp_neq_f32_e32
|
||||
; GCN: s_cbranch_vccnz [[ENDIF:BB[0-9]+_[0-9]+]]
|
||||
; GCN: s_cbranch_vccnz [[ENDIF:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: v_div_scale_f32
|
||||
|
||||
|
@ -180,7 +180,7 @@ endif:
|
|||
; vcc branch with SGPR inputs
|
||||
; GCN-LABEL: {{^}}test_vccnz_sgpr_ifcvt_triangle:
|
||||
; GCN: v_cmp_neq_f32_e64
|
||||
; GCN: s_cbranch_vccnz [[ENDIF:BB[0-9]+_[0-9]+]]
|
||||
; GCN: s_cbranch_vccnz [[ENDIF:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: s_add_i32
|
||||
|
||||
|
@ -267,7 +267,7 @@ endif:
|
|||
; Scalar branch but VGPR select operands
|
||||
; GCN-LABEL: {{^}}test_scc1_vgpr_ifcvt_triangle:
|
||||
; GCN: s_cmp_lg_u32
|
||||
; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]]
|
||||
; GCN: s_cbranch_scc1 [[ENDIF:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: v_add_f32_e32
|
||||
|
||||
|
@ -402,7 +402,7 @@ done:
|
|||
|
||||
; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle256:
|
||||
; GCN: v_cmp_neq_f32
|
||||
; GCN: s_cbranch_vccnz [[ENDIF:BB[0-9]+_[0-9]+]]
|
||||
; GCN: s_cbranch_vccnz [[ENDIF:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: v_add_i32
|
||||
; GCN: v_add_i32
|
||||
|
@ -427,7 +427,7 @@ endif:
|
|||
|
||||
; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle512:
|
||||
; GCN: v_cmp_neq_f32
|
||||
; GCN: s_cbranch_vccnz [[ENDIF:BB[0-9]+_[0-9]+]]
|
||||
; GCN: s_cbranch_vccnz [[ENDIF:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: v_add_i32
|
||||
; GCN: v_add_i32
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
; This is was lowered from the llvm.SI.end.cf intrinsic:
|
||||
; CHECK: s_or_b64 exec, exec
|
||||
|
||||
; CHECK: [[LOOP_LABEL:[0-9A-Za-z_]+]]: ; %loop{{$}}
|
||||
; CHECK: [[LOOP_LABEL:.L[0-9A-Za-z_]+]]: ; %loop{{$}}
|
||||
; CHECK-NOT: s_or_b64 exec, exec
|
||||
; CHECK: s_cbranch_execnz [[LOOP_LABEL]]
|
||||
define amdgpu_kernel void @test(i32 addrspace(1)* %out) {
|
||||
|
|
|
@ -83,14 +83,14 @@ define amdgpu_kernel void @s_add_co_br_user(i32 %i) {
|
|||
; GFX7-NEXT: s_addc_u32 s0, s2, 0
|
||||
; GFX7-NEXT: v_cmp_ge_u32_e32 vcc, s0, v0
|
||||
; GFX7-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GFX7-NEXT: s_cbranch_vccnz BB1_2
|
||||
; GFX7-NEXT: s_cbranch_vccnz .LBB1_2
|
||||
; GFX7-NEXT: ; %bb.1: ; %bb0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, 9
|
||||
; GFX7-NEXT: flat_store_dword v[0:1], v2
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: BB1_2: ; %bb1
|
||||
; GFX7-NEXT: .LBB1_2: ; %bb1
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, 10
|
||||
|
@ -110,14 +110,14 @@ define amdgpu_kernel void @s_add_co_br_user(i32 %i) {
|
|||
; GFX9-NEXT: s_addc_u32 s0, s2, 0
|
||||
; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, s0, v0
|
||||
; GFX9-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GFX9-NEXT: s_cbranch_vccnz BB1_2
|
||||
; GFX9-NEXT: s_cbranch_vccnz .LBB1_2
|
||||
; GFX9-NEXT: ; %bb.1: ; %bb0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, 9
|
||||
; GFX9-NEXT: global_store_dword v[0:1], v2, off
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: BB1_2: ; %bb1
|
||||
; GFX9-NEXT: .LBB1_2: ; %bb1
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, 10
|
||||
|
@ -137,14 +137,14 @@ define amdgpu_kernel void @s_add_co_br_user(i32 %i) {
|
|||
; GFX10-NEXT: s_addc_u32 s0, s0, 0
|
||||
; GFX10-NEXT: v_cmp_ge_u32_e32 vcc_lo, s0, v0
|
||||
; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo
|
||||
; GFX10-NEXT: s_cbranch_vccnz BB1_2
|
||||
; GFX10-NEXT: s_cbranch_vccnz .LBB1_2
|
||||
; GFX10-NEXT: ; %bb.1: ; %bb0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, 9
|
||||
; GFX10-NEXT: global_store_dword v[0:1], v2, off
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: BB1_2: ; %bb1
|
||||
; GFX10-NEXT: .LBB1_2: ; %bb1
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, 10
|
||||
|
|
|
@ -421,7 +421,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(double addrspace(1)*
|
|||
; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1]
|
||||
; GFX90A-NEXT: BB24_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: .LBB24_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
|
@ -434,7 +434,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(double addrspace(1)*
|
|||
; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
|
||||
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX90A-NEXT: s_cbranch_execnz BB24_1
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB24_1
|
||||
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX90A-NEXT: s_endpgm
|
||||
main_body:
|
||||
|
@ -469,7 +469,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(double addrsp
|
|||
; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1]
|
||||
; GFX90A-NEXT: BB26_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: .LBB26_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
|
@ -482,7 +482,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(double addrsp
|
|||
; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
|
||||
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX90A-NEXT: s_cbranch_execnz BB26_1
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB26_1
|
||||
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX90A-NEXT: s_endpgm
|
||||
main_body:
|
||||
|
@ -525,7 +525,7 @@ define double @global_atomic_fadd_f64_rtn_pat(double addrspace(1)* %ptr, double
|
|||
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: global_load_dwordx2 v[2:3], v[0:1], off
|
||||
; GFX90A-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GFX90A-NEXT: BB29_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: .LBB29_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
|
||||
|
@ -539,7 +539,7 @@ define double @global_atomic_fadd_f64_rtn_pat(double addrspace(1)* %ptr, double
|
|||
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
|
||||
; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
||||
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
||||
; GFX90A-NEXT: s_cbranch_execnz BB29_1
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB29_1
|
||||
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
|
||||
|
@ -572,7 +572,7 @@ define double @global_atomic_fadd_f64_rtn_pat_system(double addrspace(1)* %ptr,
|
|||
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: global_load_dwordx2 v[2:3], v[0:1], off
|
||||
; GFX90A-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GFX90A-NEXT: BB31_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: .LBB31_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
|
||||
|
@ -586,7 +586,7 @@ define double @global_atomic_fadd_f64_rtn_pat_system(double addrspace(1)* %ptr,
|
|||
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
|
||||
; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
||||
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
||||
; GFX90A-NEXT: s_cbranch_execnz BB31_1
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB31_1
|
||||
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
|
||||
|
@ -631,7 +631,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(double ad
|
|||
; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1]
|
||||
; GFX90A-NEXT: BB34_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: .LBB34_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
|
@ -642,7 +642,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(double ad
|
|||
; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
|
||||
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX90A-NEXT: s_cbranch_execnz BB34_1
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB34_1
|
||||
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX90A-NEXT: s_endpgm
|
||||
main_body:
|
||||
|
@ -658,7 +658,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(double* %ptr) #1 {
|
|||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
|
||||
; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
|
||||
; GFX90A-NEXT: BB35_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: .LBB35_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
|
||||
|
@ -673,7 +673,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(double* %ptr) #1 {
|
|||
; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
|
||||
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX90A-NEXT: s_cbranch_execnz BB35_1
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB35_1
|
||||
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX90A-NEXT: s_endpgm
|
||||
main_body:
|
||||
|
@ -707,7 +707,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(double* %ptr) #
|
|||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
|
||||
; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
|
||||
; GFX90A-NEXT: BB37_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: .LBB37_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
|
||||
|
@ -723,7 +723,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(double* %ptr) #
|
|||
; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
|
||||
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX90A-NEXT: s_cbranch_execnz BB37_1
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB37_1
|
||||
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX90A-NEXT: s_endpgm
|
||||
main_body:
|
||||
|
@ -737,7 +737,7 @@ define double @flat_atomic_fadd_f64_rtn_pat(double* %ptr) #1 {
|
|||
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
|
||||
; GFX90A-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GFX90A-NEXT: BB38_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: .LBB38_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
|
||||
|
@ -751,7 +751,7 @@ define double @flat_atomic_fadd_f64_rtn_pat(double* %ptr) #1 {
|
|||
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
|
||||
; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
||||
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
||||
; GFX90A-NEXT: s_cbranch_execnz BB38_1
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB38_1
|
||||
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
|
||||
|
@ -784,7 +784,7 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(double* %ptr) #1 {
|
|||
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
|
||||
; GFX90A-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GFX90A-NEXT: BB40_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: .LBB40_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
|
||||
|
@ -799,7 +799,7 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(double* %ptr) #1 {
|
|||
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
|
||||
; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
||||
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
||||
; GFX90A-NEXT: s_cbranch_execnz BB40_1
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB40_1
|
||||
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
|
||||
|
@ -846,7 +846,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(double* %pt
|
|||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
|
||||
; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
|
||||
; GFX90A-NEXT: BB43_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: .LBB43_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
|
||||
|
@ -859,7 +859,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(double* %pt
|
|||
; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
|
||||
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX90A-NEXT: s_cbranch_execnz BB43_1
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB43_1
|
||||
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX90A-NEXT: s_endpgm
|
||||
main_body:
|
||||
|
@ -994,7 +994,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(double add
|
|||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX90A-NEXT: ds_read_b64 v[0:1], v0
|
||||
; GFX90A-NEXT: BB52_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: .LBB52_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_add_f64 v[2:3], v[0:1], 4.0
|
||||
|
@ -1006,7 +1006,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(double add
|
|||
; GFX90A-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[0,1]
|
||||
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[0:1]
|
||||
; GFX90A-NEXT: s_cbranch_execnz BB52_1
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB52_1
|
||||
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX90A-NEXT: s_endpgm
|
||||
main_body:
|
||||
|
|
|
@ -14,7 +14,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32(float addrspace(1)* %ptr)
|
|||
; GFX900-NEXT: s_load_dword s4, s[0:1], 0x0
|
||||
; GFX900-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX900-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX900-NEXT: BB0_1: ; %atomicrmw.start
|
||||
; GFX900-NEXT: .LBB0_1: ; %atomicrmw.start
|
||||
; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX900-NEXT: v_mov_b32_e32 v2, v1
|
||||
; GFX900-NEXT: v_add_f32_e32 v1, 4.0, v2
|
||||
|
@ -25,7 +25,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32(float addrspace(1)* %ptr)
|
|||
; GFX900-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
||||
; GFX900-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX900-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX900-NEXT: s_cbranch_execnz BB0_1
|
||||
; GFX900-NEXT: s_cbranch_execnz .LBB0_1
|
||||
; GFX900-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX900-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX900-NEXT: global_store_dword v[0:1], v1, off
|
||||
|
@ -40,7 +40,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32(float addrspace(1)* %ptr)
|
|||
; GFX908-NEXT: s_load_dword s4, s[0:1], 0x0
|
||||
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX908-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX908-NEXT: BB0_1: ; %atomicrmw.start
|
||||
; GFX908-NEXT: .LBB0_1: ; %atomicrmw.start
|
||||
; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX908-NEXT: v_mov_b32_e32 v2, v1
|
||||
; GFX908-NEXT: v_add_f32_e32 v1, 4.0, v2
|
||||
|
@ -51,7 +51,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32(float addrspace(1)* %ptr)
|
|||
; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
||||
; GFX908-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX908-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX908-NEXT: s_cbranch_execnz BB0_1
|
||||
; GFX908-NEXT: s_cbranch_execnz .LBB0_1
|
||||
; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX908-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX908-NEXT: global_store_dword v[0:1], v1, off
|
||||
|
@ -66,7 +66,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32(float addrspace(1)* %ptr)
|
|||
; GFX90A-NEXT: s_load_dword s4, s[0:1], 0x0
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX90A-NEXT: BB0_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: .LBB0_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
|
||||
; GFX90A-NEXT: v_add_f32_e32 v2, 4.0, v3
|
||||
|
@ -79,7 +79,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32(float addrspace(1)* %ptr)
|
|||
; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
|
||||
; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX90A-NEXT: s_cbranch_execnz BB0_1
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB0_1
|
||||
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX90A-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX90A-NEXT: global_store_dword v[0:1], v1, off
|
||||
|
@ -94,7 +94,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32(float addrspace(1)* %ptr)
|
|||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-NEXT: s_mov_b32 s2, 0
|
||||
; GFX10-NEXT: BB0_1: ; %atomicrmw.start
|
||||
; GFX10-NEXT: .LBB0_1: ; %atomicrmw.start
|
||||
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, v1
|
||||
; GFX10-NEXT: v_add_f32_e32 v1, 4.0, v2
|
||||
|
@ -107,7 +107,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32(float addrspace(1)* %ptr)
|
|||
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
||||
; GFX10-NEXT: s_or_b32 s2, vcc_lo, s2
|
||||
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s2
|
||||
; GFX10-NEXT: s_cbranch_execnz BB0_1
|
||||
; GFX10-NEXT: s_cbranch_execnz .LBB0_1
|
||||
; GFX10-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s2
|
||||
; GFX10-NEXT: global_store_dword v[0:1], v1, off
|
||||
|
@ -127,7 +127,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_ieee(float addrspace(1)* %
|
|||
; GFX900-NEXT: s_load_dword s4, s[0:1], 0x0
|
||||
; GFX900-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX900-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX900-NEXT: BB1_1: ; %atomicrmw.start
|
||||
; GFX900-NEXT: .LBB1_1: ; %atomicrmw.start
|
||||
; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX900-NEXT: v_mov_b32_e32 v2, v1
|
||||
; GFX900-NEXT: v_add_f32_e32 v1, 4.0, v2
|
||||
|
@ -138,7 +138,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_ieee(float addrspace(1)* %
|
|||
; GFX900-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
||||
; GFX900-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX900-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX900-NEXT: s_cbranch_execnz BB1_1
|
||||
; GFX900-NEXT: s_cbranch_execnz .LBB1_1
|
||||
; GFX900-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX900-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX900-NEXT: global_store_dword v[0:1], v1, off
|
||||
|
@ -153,7 +153,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_ieee(float addrspace(1)* %
|
|||
; GFX908-NEXT: s_load_dword s4, s[0:1], 0x0
|
||||
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX908-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX908-NEXT: BB1_1: ; %atomicrmw.start
|
||||
; GFX908-NEXT: .LBB1_1: ; %atomicrmw.start
|
||||
; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX908-NEXT: v_mov_b32_e32 v2, v1
|
||||
; GFX908-NEXT: v_add_f32_e32 v1, 4.0, v2
|
||||
|
@ -164,7 +164,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_ieee(float addrspace(1)* %
|
|||
; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
||||
; GFX908-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX908-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX908-NEXT: s_cbranch_execnz BB1_1
|
||||
; GFX908-NEXT: s_cbranch_execnz .LBB1_1
|
||||
; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX908-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX908-NEXT: global_store_dword v[0:1], v1, off
|
||||
|
@ -191,7 +191,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_ieee(float addrspace(1)* %
|
|||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-NEXT: s_mov_b32 s2, 0
|
||||
; GFX10-NEXT: BB1_1: ; %atomicrmw.start
|
||||
; GFX10-NEXT: .LBB1_1: ; %atomicrmw.start
|
||||
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, v1
|
||||
; GFX10-NEXT: v_add_f32_e32 v1, 4.0, v2
|
||||
|
@ -204,7 +204,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_ieee(float addrspace(1)* %
|
|||
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
||||
; GFX10-NEXT: s_or_b32 s2, vcc_lo, s2
|
||||
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s2
|
||||
; GFX10-NEXT: s_cbranch_execnz BB1_1
|
||||
; GFX10-NEXT: s_cbranch_execnz .LBB1_1
|
||||
; GFX10-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s2
|
||||
; GFX10-NEXT: global_store_dword v[0:1], v1, off
|
||||
|
@ -224,7 +224,7 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32(float addrspace(1)* %ptr
|
|||
; GFX900-NEXT: s_load_dword s4, s[0:1], 0x0
|
||||
; GFX900-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX900-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX900-NEXT: BB2_1: ; %atomicrmw.start
|
||||
; GFX900-NEXT: .LBB2_1: ; %atomicrmw.start
|
||||
; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX900-NEXT: v_add_f32_e32 v0, 4.0, v1
|
||||
; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
|
@ -235,7 +235,7 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32(float addrspace(1)* %ptr
|
|||
; GFX900-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX900-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX900-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX900-NEXT: s_cbranch_execnz BB2_1
|
||||
; GFX900-NEXT: s_cbranch_execnz .LBB2_1
|
||||
; GFX900-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX900-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -270,7 +270,7 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32(float addrspace(1)* %ptr
|
|||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-NEXT: s_mov_b32 s2, 0
|
||||
; GFX10-NEXT: BB2_1: ; %atomicrmw.start
|
||||
; GFX10-NEXT: .LBB2_1: ; %atomicrmw.start
|
||||
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-NEXT: v_add_f32_e32 v0, 4.0, v1
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
|
@ -283,7 +283,7 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32(float addrspace(1)* %ptr
|
|||
; GFX10-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX10-NEXT: s_or_b32 s2, vcc_lo, s2
|
||||
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s2
|
||||
; GFX10-NEXT: s_cbranch_execnz BB2_1
|
||||
; GFX10-NEXT: s_cbranch_execnz .LBB2_1
|
||||
; GFX10-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX10-NEXT: s_endpgm
|
||||
%result = atomicrmw fadd float addrspace(1)* %ptr, float 4.0 syncscope("agent") seq_cst
|
||||
|
@ -300,7 +300,7 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32_ieee(float addrspace(1)*
|
|||
; GFX900-NEXT: s_load_dword s4, s[0:1], 0x0
|
||||
; GFX900-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX900-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX900-NEXT: BB3_1: ; %atomicrmw.start
|
||||
; GFX900-NEXT: .LBB3_1: ; %atomicrmw.start
|
||||
; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX900-NEXT: v_add_f32_e32 v0, 4.0, v1
|
||||
; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
|
@ -311,7 +311,7 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32_ieee(float addrspace(1)*
|
|||
; GFX900-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX900-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX900-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX900-NEXT: s_cbranch_execnz BB3_1
|
||||
; GFX900-NEXT: s_cbranch_execnz .LBB3_1
|
||||
; GFX900-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX900-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -346,7 +346,7 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32_ieee(float addrspace(1)*
|
|||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-NEXT: s_mov_b32 s2, 0
|
||||
; GFX10-NEXT: BB3_1: ; %atomicrmw.start
|
||||
; GFX10-NEXT: .LBB3_1: ; %atomicrmw.start
|
||||
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-NEXT: v_add_f32_e32 v0, 4.0, v1
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
|
@ -359,7 +359,7 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32_ieee(float addrspace(1)*
|
|||
; GFX10-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX10-NEXT: s_or_b32 s2, vcc_lo, s2
|
||||
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s2
|
||||
; GFX10-NEXT: s_cbranch_execnz BB3_1
|
||||
; GFX10-NEXT: s_cbranch_execnz .LBB3_1
|
||||
; GFX10-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX10-NEXT: s_endpgm
|
||||
%result = atomicrmw fadd float addrspace(1)* %ptr, float 4.0 syncscope("agent") seq_cst
|
||||
|
@ -376,7 +376,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_agent(float addrspace(1)*
|
|||
; GFX900-NEXT: s_load_dword s4, s[0:1], 0x0
|
||||
; GFX900-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX900-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX900-NEXT: BB4_1: ; %atomicrmw.start
|
||||
; GFX900-NEXT: .LBB4_1: ; %atomicrmw.start
|
||||
; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX900-NEXT: v_mov_b32_e32 v2, v1
|
||||
; GFX900-NEXT: v_add_f32_e32 v1, 4.0, v2
|
||||
|
@ -387,7 +387,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_agent(float addrspace(1)*
|
|||
; GFX900-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
||||
; GFX900-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX900-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX900-NEXT: s_cbranch_execnz BB4_1
|
||||
; GFX900-NEXT: s_cbranch_execnz .LBB4_1
|
||||
; GFX900-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX900-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX900-NEXT: global_store_dword v[0:1], v1, off
|
||||
|
@ -402,7 +402,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_agent(float addrspace(1)*
|
|||
; GFX908-NEXT: s_load_dword s4, s[0:1], 0x0
|
||||
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX908-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX908-NEXT: BB4_1: ; %atomicrmw.start
|
||||
; GFX908-NEXT: .LBB4_1: ; %atomicrmw.start
|
||||
; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX908-NEXT: v_mov_b32_e32 v2, v1
|
||||
; GFX908-NEXT: v_add_f32_e32 v1, 4.0, v2
|
||||
|
@ -413,7 +413,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_agent(float addrspace(1)*
|
|||
; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
||||
; GFX908-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX908-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX908-NEXT: s_cbranch_execnz BB4_1
|
||||
; GFX908-NEXT: s_cbranch_execnz .LBB4_1
|
||||
; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX908-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX908-NEXT: global_store_dword v[0:1], v1, off
|
||||
|
@ -440,7 +440,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_agent(float addrspace(1)*
|
|||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-NEXT: s_mov_b32 s2, 0
|
||||
; GFX10-NEXT: BB4_1: ; %atomicrmw.start
|
||||
; GFX10-NEXT: .LBB4_1: ; %atomicrmw.start
|
||||
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, v1
|
||||
; GFX10-NEXT: v_add_f32_e32 v1, 4.0, v2
|
||||
|
@ -453,7 +453,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_agent(float addrspace(1)*
|
|||
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
||||
; GFX10-NEXT: s_or_b32 s2, vcc_lo, s2
|
||||
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s2
|
||||
; GFX10-NEXT: s_cbranch_execnz BB4_1
|
||||
; GFX10-NEXT: s_cbranch_execnz .LBB4_1
|
||||
; GFX10-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s2
|
||||
; GFX10-NEXT: global_store_dword v[0:1], v1, off
|
||||
|
@ -473,7 +473,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_system(float addrspace(1)*
|
|||
; GFX900-NEXT: s_load_dword s4, s[0:1], 0x0
|
||||
; GFX900-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX900-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX900-NEXT: BB5_1: ; %atomicrmw.start
|
||||
; GFX900-NEXT: .LBB5_1: ; %atomicrmw.start
|
||||
; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX900-NEXT: v_mov_b32_e32 v2, v1
|
||||
; GFX900-NEXT: v_add_f32_e32 v1, 4.0, v2
|
||||
|
@ -484,7 +484,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_system(float addrspace(1)*
|
|||
; GFX900-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
||||
; GFX900-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX900-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX900-NEXT: s_cbranch_execnz BB5_1
|
||||
; GFX900-NEXT: s_cbranch_execnz .LBB5_1
|
||||
; GFX900-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX900-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX900-NEXT: global_store_dword v[0:1], v1, off
|
||||
|
@ -499,7 +499,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_system(float addrspace(1)*
|
|||
; GFX908-NEXT: s_load_dword s4, s[0:1], 0x0
|
||||
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX908-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX908-NEXT: BB5_1: ; %atomicrmw.start
|
||||
; GFX908-NEXT: .LBB5_1: ; %atomicrmw.start
|
||||
; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX908-NEXT: v_mov_b32_e32 v2, v1
|
||||
; GFX908-NEXT: v_add_f32_e32 v1, 4.0, v2
|
||||
|
@ -510,7 +510,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_system(float addrspace(1)*
|
|||
; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
||||
; GFX908-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX908-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX908-NEXT: s_cbranch_execnz BB5_1
|
||||
; GFX908-NEXT: s_cbranch_execnz .LBB5_1
|
||||
; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX908-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX908-NEXT: global_store_dword v[0:1], v1, off
|
||||
|
@ -525,7 +525,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_system(float addrspace(1)*
|
|||
; GFX90A-NEXT: s_load_dword s4, s[0:1], 0x0
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX90A-NEXT: BB5_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: .LBB5_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
|
||||
; GFX90A-NEXT: v_add_f32_e32 v2, 4.0, v3
|
||||
|
@ -538,7 +538,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_system(float addrspace(1)*
|
|||
; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
|
||||
; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX90A-NEXT: s_cbranch_execnz BB5_1
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB5_1
|
||||
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX90A-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX90A-NEXT: global_store_dword v[0:1], v1, off
|
||||
|
@ -553,7 +553,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_system(float addrspace(1)*
|
|||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-NEXT: s_mov_b32 s2, 0
|
||||
; GFX10-NEXT: BB5_1: ; %atomicrmw.start
|
||||
; GFX10-NEXT: .LBB5_1: ; %atomicrmw.start
|
||||
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, v1
|
||||
; GFX10-NEXT: v_add_f32_e32 v1, 4.0, v2
|
||||
|
@ -566,7 +566,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_system(float addrspace(1)*
|
|||
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
||||
; GFX10-NEXT: s_or_b32 s2, vcc_lo, s2
|
||||
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s2
|
||||
; GFX10-NEXT: s_cbranch_execnz BB5_1
|
||||
; GFX10-NEXT: s_cbranch_execnz .LBB5_1
|
||||
; GFX10-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s2
|
||||
; GFX10-NEXT: global_store_dword v[0:1], v1, off
|
||||
|
@ -586,7 +586,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_wrong_subtarget(float addr
|
|||
; GCN-NEXT: s_load_dword s4, s[0:1], 0x0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GCN-NEXT: BB6_1: ; %atomicrmw.start
|
||||
; GCN-NEXT: .LBB6_1: ; %atomicrmw.start
|
||||
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, v1
|
||||
; GCN-NEXT: v_add_f32_e32 v1, 4.0, v2
|
||||
|
@ -597,7 +597,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_wrong_subtarget(float addr
|
|||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
||||
; GCN-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GCN-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GCN-NEXT: s_cbranch_execnz BB6_1
|
||||
; GCN-NEXT: s_cbranch_execnz .LBB6_1
|
||||
; GCN-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GCN-NEXT: global_store_dword v[0:1], v1, off
|
||||
|
@ -632,7 +632,7 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32_safe(float addrspace(1)*
|
|||
; GFX900-NEXT: s_load_dword s4, s[0:1], 0x0
|
||||
; GFX900-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX900-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX900-NEXT: BB8_1: ; %atomicrmw.start
|
||||
; GFX900-NEXT: .LBB8_1: ; %atomicrmw.start
|
||||
; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX900-NEXT: v_add_f32_e32 v0, 4.0, v1
|
||||
; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
|
@ -643,7 +643,7 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32_safe(float addrspace(1)*
|
|||
; GFX900-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX900-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX900-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX900-NEXT: s_cbranch_execnz BB8_1
|
||||
; GFX900-NEXT: s_cbranch_execnz .LBB8_1
|
||||
; GFX900-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX900-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -656,7 +656,7 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32_safe(float addrspace(1)*
|
|||
; GFX908-NEXT: s_load_dword s4, s[0:1], 0x0
|
||||
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX908-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX908-NEXT: BB8_1: ; %atomicrmw.start
|
||||
; GFX908-NEXT: .LBB8_1: ; %atomicrmw.start
|
||||
; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX908-NEXT: v_add_f32_e32 v0, 4.0, v1
|
||||
; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
|
@ -667,7 +667,7 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32_safe(float addrspace(1)*
|
|||
; GFX908-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX908-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX908-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX908-NEXT: s_cbranch_execnz BB8_1
|
||||
; GFX908-NEXT: s_cbranch_execnz .LBB8_1
|
||||
; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX908-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -680,7 +680,7 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32_safe(float addrspace(1)*
|
|||
; GFX90A-NEXT: s_load_dword s4, s[0:1], 0x0
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX90A-NEXT: BB8_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: .LBB8_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: v_add_f32_e32 v0, 4.0, v1
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
|
@ -691,7 +691,7 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32_safe(float addrspace(1)*
|
|||
; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX90A-NEXT: s_cbranch_execnz BB8_1
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB8_1
|
||||
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX90A-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -704,7 +704,7 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32_safe(float addrspace(1)*
|
|||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-NEXT: s_mov_b32 s2, 0
|
||||
; GFX10-NEXT: BB8_1: ; %atomicrmw.start
|
||||
; GFX10-NEXT: .LBB8_1: ; %atomicrmw.start
|
||||
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-NEXT: v_add_f32_e32 v0, 4.0, v1
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
|
@ -717,7 +717,7 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32_safe(float addrspace(1)*
|
|||
; GFX10-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX10-NEXT: s_or_b32 s2, vcc_lo, s2
|
||||
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s2
|
||||
; GFX10-NEXT: s_cbranch_execnz BB8_1
|
||||
; GFX10-NEXT: s_cbranch_execnz .LBB8_1
|
||||
; GFX10-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX10-NEXT: s_endpgm
|
||||
%result = atomicrmw fadd float addrspace(1)* %ptr, float 4.0 syncscope("agent") seq_cst
|
||||
|
@ -736,7 +736,7 @@ define amdgpu_kernel void @infer_as_before_atomic(float* addrspace(4)* %arg) #0
|
|||
; GFX900-NEXT: s_load_dword s4, s[0:1], 0x0
|
||||
; GFX900-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX900-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX900-NEXT: BB9_1: ; %atomicrmw.start
|
||||
; GFX900-NEXT: .LBB9_1: ; %atomicrmw.start
|
||||
; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX900-NEXT: v_add_f32_e32 v0, 1.0, v1
|
||||
; GFX900-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
|
||||
|
@ -745,7 +745,7 @@ define amdgpu_kernel void @infer_as_before_atomic(float* addrspace(4)* %arg) #0
|
|||
; GFX900-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX900-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX900-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX900-NEXT: s_cbranch_execnz BB9_1
|
||||
; GFX900-NEXT: s_cbranch_execnz .LBB9_1
|
||||
; GFX900-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX900-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -782,7 +782,7 @@ define amdgpu_kernel void @infer_as_before_atomic(float* addrspace(4)* %arg) #0
|
|||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-NEXT: s_mov_b32 s2, 0
|
||||
; GFX10-NEXT: BB9_1: ; %atomicrmw.start
|
||||
; GFX10-NEXT: .LBB9_1: ; %atomicrmw.start
|
||||
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-NEXT: v_add_f32_e32 v0, 1.0, v1
|
||||
; GFX10-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
|
||||
|
@ -791,7 +791,7 @@ define amdgpu_kernel void @infer_as_before_atomic(float* addrspace(4)* %arg) #0
|
|||
; GFX10-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX10-NEXT: s_or_b32 s2, vcc_lo, s2
|
||||
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s2
|
||||
; GFX10-NEXT: s_cbranch_execnz BB9_1
|
||||
; GFX10-NEXT: s_cbranch_execnz .LBB9_1
|
||||
; GFX10-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX10-NEXT: s_endpgm
|
||||
%load = load float*, float* addrspace(4)* %arg
|
||||
|
|
|
@ -23,7 +23,7 @@ define amdgpu_kernel void @test_move_load_address_to_vgpr(i32 addrspace(1)* noca
|
|||
; GCN-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
|
||||
; GCN-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0
|
||||
; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
|
||||
; GCN-NEXT: BB0_1: ; %bb3
|
||||
; GCN-NEXT: .LBB0_1: ; %bb3
|
||||
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: global_load_dword v3, v[0:1], off glc
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -31,7 +31,7 @@ define amdgpu_kernel void @test_move_load_address_to_vgpr(i32 addrspace(1)* noca
|
|||
; GCN-NEXT: v_add_co_u32_e64 v0, s[0:1], 4, v0
|
||||
; GCN-NEXT: v_addc_co_u32_e64 v1, s[0:1], 0, v1, s[0:1]
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-NEXT: s_cbranch_vccz BB0_1
|
||||
; GCN-NEXT: s_cbranch_vccz .LBB0_1
|
||||
; GCN-NEXT: ; %bb.2: ; %bb2
|
||||
; GCN-NEXT: s_endpgm
|
||||
bb:
|
||||
|
@ -61,7 +61,7 @@ define amdgpu_kernel void @test_move_load_address_to_vgpr_d16_hi(i16 addrspace(1
|
|||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: global_load_ushort v0, v1, s[0:1] glc
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: BB1_1: ; %bb3
|
||||
; GCN-NEXT: .LBB1_1: ; %bb3
|
||||
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_lshlrev_b64 v[2:3], 1, v[0:1]
|
||||
|
@ -72,7 +72,7 @@ define amdgpu_kernel void @test_move_load_address_to_vgpr_d16_hi(i16 addrspace(1
|
|||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s2, v0
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-NEXT: s_cbranch_vccz BB1_1
|
||||
; GCN-NEXT: s_cbranch_vccz .LBB1_1
|
||||
; GCN-NEXT: ; %bb.2: ; %bb2
|
||||
; GCN-NEXT: s_endpgm
|
||||
bb:
|
||||
|
|
|
@ -2389,7 +2389,7 @@ define amdgpu_ps void @global_addr_64bit_lsr_iv(float addrspace(1)* inreg %arg)
|
|||
; GFX9: ; %bb.0: ; %bb
|
||||
; GFX9-NEXT: s_mov_b64 s[0:1], 0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX9-NEXT: BB128_1: ; %bb3
|
||||
; GFX9-NEXT: .LBB128_1: ; %bb3
|
||||
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX9-NEXT: s_add_u32 s4, s2, s0
|
||||
; GFX9-NEXT: s_addc_u32 s5, s3, s1
|
||||
|
@ -2398,7 +2398,7 @@ define amdgpu_ps void @global_addr_64bit_lsr_iv(float addrspace(1)* inreg %arg)
|
|||
; GFX9-NEXT: s_add_u32 s0, s0, 4
|
||||
; GFX9-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX9-NEXT: s_cmpk_eq_i32 s0, 0x400
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB128_1
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB128_1
|
||||
; GFX9-NEXT: ; %bb.2: ; %bb2
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -2406,7 +2406,7 @@ define amdgpu_ps void @global_addr_64bit_lsr_iv(float addrspace(1)* inreg %arg)
|
|||
; GFX10: ; %bb.0: ; %bb
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX10-NEXT: s_mov_b64 s[0:1], 0
|
||||
; GFX10-NEXT: BB128_1: ; %bb3
|
||||
; GFX10-NEXT: .LBB128_1: ; %bb3
|
||||
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX10-NEXT: s_add_u32 s4, s2, s0
|
||||
|
@ -2416,7 +2416,7 @@ define amdgpu_ps void @global_addr_64bit_lsr_iv(float addrspace(1)* inreg %arg)
|
|||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX10-NEXT: s_cmpk_eq_i32 s0, 0x400
|
||||
; GFX10-NEXT: s_cbranch_scc0 BB128_1
|
||||
; GFX10-NEXT: s_cbranch_scc0 .LBB128_1
|
||||
; GFX10-NEXT: ; %bb.2: ; %bb2
|
||||
; GFX10-NEXT: s_endpgm
|
||||
bb:
|
||||
|
@ -2442,7 +2442,7 @@ define amdgpu_ps void @global_addr_64bit_lsr_iv_multiload(float addrspace(1)* in
|
|||
; GFX9: ; %bb.0: ; %bb
|
||||
; GFX9-NEXT: s_mov_b64 s[0:1], 0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX9-NEXT: BB129_1: ; %bb3
|
||||
; GFX9-NEXT: .LBB129_1: ; %bb3
|
||||
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX9-NEXT: s_add_u32 s4, s2, s0
|
||||
; GFX9-NEXT: s_addc_u32 s5, s3, s1
|
||||
|
@ -2454,7 +2454,7 @@ define amdgpu_ps void @global_addr_64bit_lsr_iv_multiload(float addrspace(1)* in
|
|||
; GFX9-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX9-NEXT: s_cmpk_eq_i32 s0, 0x400
|
||||
; GFX9-NEXT: ; kill: killed $sgpr4 killed $sgpr5
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB129_1
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB129_1
|
||||
; GFX9-NEXT: ; %bb.2: ; %bb2
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -2462,7 +2462,7 @@ define amdgpu_ps void @global_addr_64bit_lsr_iv_multiload(float addrspace(1)* in
|
|||
; GFX10: ; %bb.0: ; %bb
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX10-NEXT: s_mov_b64 s[0:1], 0
|
||||
; GFX10-NEXT: BB129_1: ; %bb3
|
||||
; GFX10-NEXT: .LBB129_1: ; %bb3
|
||||
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX10-NEXT: s_add_u32 s4, s2, s0
|
||||
|
@ -2475,7 +2475,7 @@ define amdgpu_ps void @global_addr_64bit_lsr_iv_multiload(float addrspace(1)* in
|
|||
; GFX10-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX10-NEXT: s_cmpk_eq_i32 s0, 0x400
|
||||
; GFX10-NEXT: ; kill: killed $sgpr4 killed $sgpr5
|
||||
; GFX10-NEXT: s_cbranch_scc0 BB129_1
|
||||
; GFX10-NEXT: s_cbranch_scc0 .LBB129_1
|
||||
; GFX10-NEXT: ; %bb.2: ; %bb2
|
||||
; GFX10-NEXT: s_endpgm
|
||||
bb:
|
||||
|
|
|
@ -8,10 +8,10 @@ define amdgpu_ps void @i1_copy_from_loop(<4 x i32> inreg %rsrc, i32 %tid) {
|
|||
; SI-NEXT: s_mov_b64 s[4:5], 0
|
||||
; SI-NEXT: ; implicit-def: $sgpr6_sgpr7
|
||||
; SI-NEXT: ; implicit-def: $sgpr8_sgpr9
|
||||
; SI-NEXT: s_branch BB0_3
|
||||
; SI-NEXT: BB0_1: ; in Loop: Header=BB0_3 Depth=1
|
||||
; SI-NEXT: s_branch .LBB0_3
|
||||
; SI-NEXT: .LBB0_1: ; in Loop: Header=BB0_3 Depth=1
|
||||
; SI-NEXT: ; implicit-def: $sgpr14
|
||||
; SI-NEXT: BB0_2: ; %Flow
|
||||
; SI-NEXT: .LBB0_2: ; %Flow
|
||||
; SI-NEXT: ; in Loop: Header=BB0_3 Depth=1
|
||||
; SI-NEXT: s_and_b64 s[12:13], exec, s[8:9]
|
||||
; SI-NEXT: s_or_b64 s[4:5], s[12:13], s[4:5]
|
||||
|
@ -19,14 +19,14 @@ define amdgpu_ps void @i1_copy_from_loop(<4 x i32> inreg %rsrc, i32 %tid) {
|
|||
; SI-NEXT: s_and_b64 s[10:11], s[10:11], exec
|
||||
; SI-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11]
|
||||
; SI-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
||||
; SI-NEXT: s_cbranch_execz BB0_7
|
||||
; SI-NEXT: BB0_3: ; %for.body
|
||||
; SI-NEXT: s_cbranch_execz .LBB0_7
|
||||
; SI-NEXT: .LBB0_3: ; %for.body
|
||||
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; SI-NEXT: s_cmp_lt_u32 s14, 4
|
||||
; SI-NEXT: s_cselect_b64 s[10:11], -1, 0
|
||||
; SI-NEXT: s_or_b64 s[8:9], s[8:9], exec
|
||||
; SI-NEXT: s_cmp_gt_u32 s14, 3
|
||||
; SI-NEXT: s_cbranch_scc1 BB0_1
|
||||
; SI-NEXT: s_cbranch_scc1 .LBB0_1
|
||||
; SI-NEXT: ; %bb.4: ; %mid.loop
|
||||
; SI-NEXT: ; in Loop: Header=BB0_3 Depth=1
|
||||
; SI-NEXT: v_mov_b32_e32 v1, s14
|
||||
|
@ -43,14 +43,14 @@ define amdgpu_ps void @i1_copy_from_loop(<4 x i32> inreg %rsrc, i32 %tid) {
|
|||
; SI-NEXT: ; %bb.6: ; %Flow1
|
||||
; SI-NEXT: ; in Loop: Header=BB0_3 Depth=1
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[12:13]
|
||||
; SI-NEXT: s_branch BB0_2
|
||||
; SI-NEXT: BB0_7: ; %for.end
|
||||
; SI-NEXT: s_branch .LBB0_2
|
||||
; SI-NEXT: .LBB0_7: ; %for.end
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; SI-NEXT: s_and_saveexec_b64 s[0:1], s[6:7]
|
||||
; SI-NEXT: s_cbranch_execz BB0_9
|
||||
; SI-NEXT: s_cbranch_execz .LBB0_9
|
||||
; SI-NEXT: ; %bb.8: ; %if
|
||||
; SI-NEXT: exp mrt0 v0, v0, v0, v0 done vm
|
||||
; SI-NEXT: BB0_9: ; %end
|
||||
; SI-NEXT: .LBB0_9: ; %end
|
||||
; SI-NEXT: s_endpgm
|
||||
entry:
|
||||
br label %for.body
|
||||
|
|
|
@ -4,11 +4,11 @@
|
|||
|
||||
; GCN: ; %entry
|
||||
; GCN: s_cmp_eq_u32 s0, 0
|
||||
; GCN: s_cbranch_scc1 [[EXIT:BB[0-9_]+]]
|
||||
; GCN: s_cbranch_scc1 [[EXIT:.LBB[0-9_]+]]
|
||||
|
||||
; GCN: ; %blocka
|
||||
; GCN: s_cmp_eq_u32 s1, 0
|
||||
; GCN: s_cbranch_scc1 [[PREEXIT:BB[0-9_]+]]
|
||||
; GCN: s_cbranch_scc1 [[PREEXIT:.LBB[0-9_]+]]
|
||||
|
||||
; GCN: [[PREEXIT]]:
|
||||
; GCN: [[EXIT]]:
|
||||
|
|
|
@ -18,7 +18,7 @@ define amdgpu_kernel void @udiv32_invariant_denom(i32 addrspace(1)* nocapture %a
|
|||
; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1
|
||||
; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX9-NEXT: BB0_1: ; %bb3
|
||||
; GFX9-NEXT: .LBB0_1: ; %bb3
|
||||
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX9-NEXT: v_mul_lo_u32 v2, s3, v0
|
||||
; GFX9-NEXT: v_mul_hi_u32 v3, s2, v0
|
||||
|
@ -41,7 +41,7 @@ define amdgpu_kernel void @udiv32_invariant_denom(i32 addrspace(1)* nocapture %a
|
|||
; GFX9-NEXT: s_add_u32 s0, s0, 4
|
||||
; GFX9-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX9-NEXT: s_cmpk_eq_i32 s2, 0x400
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB0_1
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB0_1
|
||||
; GFX9-NEXT: ; %bb.2: ; %bb2
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -60,7 +60,7 @@ define amdgpu_kernel void @udiv32_invariant_denom(i32 addrspace(1)* nocapture %a
|
|||
; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1
|
||||
; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX10-NEXT: BB0_1: ; %bb3
|
||||
; GFX10-NEXT: .LBB0_1: ; %bb3
|
||||
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-NEXT: v_mul_lo_u32 v2, s3, v0
|
||||
; GFX10-NEXT: v_mul_hi_u32 v3, s2, v0
|
||||
|
@ -84,7 +84,7 @@ define amdgpu_kernel void @udiv32_invariant_denom(i32 addrspace(1)* nocapture %a
|
|||
; GFX10-NEXT: s_add_u32 s0, s0, 4
|
||||
; GFX10-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX10-NEXT: s_cmpk_eq_i32 s2, 0x400
|
||||
; GFX10-NEXT: s_cbranch_scc0 BB0_1
|
||||
; GFX10-NEXT: s_cbranch_scc0 .LBB0_1
|
||||
; GFX10-NEXT: ; %bb.2: ; %bb2
|
||||
; GFX10-NEXT: s_endpgm
|
||||
bb:
|
||||
|
@ -120,7 +120,7 @@ define amdgpu_kernel void @urem32_invariant_denom(i32 addrspace(1)* nocapture %a
|
|||
; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1
|
||||
; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX9-NEXT: BB1_1: ; %bb3
|
||||
; GFX9-NEXT: .LBB1_1: ; %bb3
|
||||
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX9-NEXT: v_mul_lo_u32 v2, s3, v0
|
||||
; GFX9-NEXT: v_mul_hi_u32 v3, s2, v0
|
||||
|
@ -141,7 +141,7 @@ define amdgpu_kernel void @urem32_invariant_denom(i32 addrspace(1)* nocapture %a
|
|||
; GFX9-NEXT: s_add_u32 s0, s0, 4
|
||||
; GFX9-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX9-NEXT: s_cmpk_eq_i32 s2, 0x400
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB1_1
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB1_1
|
||||
; GFX9-NEXT: ; %bb.2: ; %bb2
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -160,7 +160,7 @@ define amdgpu_kernel void @urem32_invariant_denom(i32 addrspace(1)* nocapture %a
|
|||
; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1
|
||||
; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX10-NEXT: BB1_1: ; %bb3
|
||||
; GFX10-NEXT: .LBB1_1: ; %bb3
|
||||
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-NEXT: v_mul_lo_u32 v2, s3, v0
|
||||
; GFX10-NEXT: v_mul_hi_u32 v3, s2, v0
|
||||
|
@ -182,7 +182,7 @@ define amdgpu_kernel void @urem32_invariant_denom(i32 addrspace(1)* nocapture %a
|
|||
; GFX10-NEXT: s_add_u32 s0, s0, 4
|
||||
; GFX10-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX10-NEXT: s_cmpk_eq_i32 s2, 0x400
|
||||
; GFX10-NEXT: s_cbranch_scc0 BB1_1
|
||||
; GFX10-NEXT: s_cbranch_scc0 .LBB1_1
|
||||
; GFX10-NEXT: ; %bb.2: ; %bb2
|
||||
; GFX10-NEXT: s_endpgm
|
||||
bb:
|
||||
|
@ -221,7 +221,7 @@ define amdgpu_kernel void @sdiv32_invariant_denom(i32 addrspace(1)* nocapture %a
|
|||
; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1
|
||||
; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX9-NEXT: BB2_1: ; %bb3
|
||||
; GFX9-NEXT: .LBB2_1: ; %bb3
|
||||
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX9-NEXT: v_mul_hi_u32 v2, s4, v0
|
||||
; GFX9-NEXT: v_mul_lo_u32 v3, v2, s3
|
||||
|
@ -242,7 +242,7 @@ define amdgpu_kernel void @sdiv32_invariant_denom(i32 addrspace(1)* nocapture %a
|
|||
; GFX9-NEXT: s_add_u32 s0, s0, 4
|
||||
; GFX9-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX9-NEXT: s_cmpk_eq_i32 s4, 0x400
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB2_1
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB2_1
|
||||
; GFX9-NEXT: ; %bb.2: ; %bb2
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -264,7 +264,7 @@ define amdgpu_kernel void @sdiv32_invariant_denom(i32 addrspace(1)* nocapture %a
|
|||
; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1
|
||||
; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX10-NEXT: BB2_1: ; %bb3
|
||||
; GFX10-NEXT: .LBB2_1: ; %bb3
|
||||
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-NEXT: v_mul_hi_u32 v2, s4, v0
|
||||
; GFX10-NEXT: v_mul_lo_u32 v3, v2, s3
|
||||
|
@ -286,7 +286,7 @@ define amdgpu_kernel void @sdiv32_invariant_denom(i32 addrspace(1)* nocapture %a
|
|||
; GFX10-NEXT: s_add_u32 s0, s0, 4
|
||||
; GFX10-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX10-NEXT: s_cmpk_eq_i32 s4, 0x400
|
||||
; GFX10-NEXT: s_cbranch_scc0 BB2_1
|
||||
; GFX10-NEXT: s_cbranch_scc0 .LBB2_1
|
||||
; GFX10-NEXT: ; %bb.2: ; %bb2
|
||||
; GFX10-NEXT: s_endpgm
|
||||
bb:
|
||||
|
@ -325,7 +325,7 @@ define amdgpu_kernel void @srem32_invariant_denom(i32 addrspace(1)* nocapture %a
|
|||
; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1
|
||||
; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX9-NEXT: BB3_1: ; %bb3
|
||||
; GFX9-NEXT: .LBB3_1: ; %bb3
|
||||
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX9-NEXT: v_mul_hi_u32 v2, s3, v0
|
||||
; GFX9-NEXT: v_mul_lo_u32 v2, v2, s2
|
||||
|
@ -342,7 +342,7 @@ define amdgpu_kernel void @srem32_invariant_denom(i32 addrspace(1)* nocapture %a
|
|||
; GFX9-NEXT: s_add_u32 s0, s0, 4
|
||||
; GFX9-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX9-NEXT: s_cmpk_eq_i32 s3, 0x400
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB3_1
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB3_1
|
||||
; GFX9-NEXT: ; %bb.2: ; %bb2
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -364,7 +364,7 @@ define amdgpu_kernel void @srem32_invariant_denom(i32 addrspace(1)* nocapture %a
|
|||
; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1
|
||||
; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX10-NEXT: BB3_1: ; %bb3
|
||||
; GFX10-NEXT: .LBB3_1: ; %bb3
|
||||
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-NEXT: v_mul_hi_u32 v2, s3, v0
|
||||
; GFX10-NEXT: v_mul_lo_u32 v2, v2, s2
|
||||
|
@ -382,7 +382,7 @@ define amdgpu_kernel void @srem32_invariant_denom(i32 addrspace(1)* nocapture %a
|
|||
; GFX10-NEXT: s_add_u32 s0, s0, 4
|
||||
; GFX10-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX10-NEXT: s_cmpk_eq_i32 s3, 0x400
|
||||
; GFX10-NEXT: s_cbranch_scc0 BB3_1
|
||||
; GFX10-NEXT: s_cbranch_scc0 .LBB3_1
|
||||
; GFX10-NEXT: ; %bb.2: ; %bb2
|
||||
; GFX10-NEXT: s_endpgm
|
||||
bb:
|
||||
|
@ -415,7 +415,7 @@ define amdgpu_kernel void @udiv16_invariant_denom(i16 addrspace(1)* nocapture %a
|
|||
; GFX9-NEXT: v_cvt_f32_u32_e32 v2, s2
|
||||
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
|
||||
; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v2
|
||||
; GFX9-NEXT: BB4_1: ; %bb3
|
||||
; GFX9-NEXT: .LBB4_1: ; %bb3
|
||||
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX9-NEXT: v_and_b32_e32 v0, s4, v4
|
||||
; GFX9-NEXT: v_cvt_f32_u32_e32 v8, v0
|
||||
|
@ -434,7 +434,7 @@ define amdgpu_kernel void @udiv16_invariant_denom(i16 addrspace(1)* nocapture %a
|
|||
; GFX9-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GFX9-NEXT: v_addc_co_u32_e64 v0, s[0:1], 0, v7, s[0:1]
|
||||
; GFX9-NEXT: global_store_short v[5:6], v0, off
|
||||
; GFX9-NEXT: s_cbranch_vccz BB4_1
|
||||
; GFX9-NEXT: s_cbranch_vccz .LBB4_1
|
||||
; GFX9-NEXT: ; %bb.2: ; %bb2
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -450,7 +450,7 @@ define amdgpu_kernel void @udiv16_invariant_denom(i16 addrspace(1)* nocapture %a
|
|||
; GFX10-NEXT: s_and_b32 s0, s1, s4
|
||||
; GFX10-NEXT: v_cvt_f32_u32_e32 v2, s0
|
||||
; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v2
|
||||
; GFX10-NEXT: BB4_1: ; %bb3
|
||||
; GFX10-NEXT: .LBB4_1: ; %bb3
|
||||
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, s1, v4
|
||||
; GFX10-NEXT: v_add_nc_u16 v4, v4, 1
|
||||
|
@ -467,7 +467,7 @@ define amdgpu_kernel void @udiv16_invariant_denom(i16 addrspace(1)* nocapture %a
|
|||
; GFX10-NEXT: v_cmp_ge_f32_e64 s0, |v7|, v2
|
||||
; GFX10-NEXT: v_add_co_ci_u32_e64 v0, s0, 0, v0, s0
|
||||
; GFX10-NEXT: global_store_short v[5:6], v0, off
|
||||
; GFX10-NEXT: s_cbranch_vccz BB4_1
|
||||
; GFX10-NEXT: s_cbranch_vccz .LBB4_1
|
||||
; GFX10-NEXT: ; %bb.2: ; %bb2
|
||||
; GFX10-NEXT: s_endpgm
|
||||
bb:
|
||||
|
@ -500,7 +500,7 @@ define amdgpu_kernel void @urem16_invariant_denom(i16 addrspace(1)* nocapture %a
|
|||
; GFX9-NEXT: v_cvt_f32_u32_e32 v2, s7
|
||||
; GFX9-NEXT: v_mov_b32_e32 v4, 0
|
||||
; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v2
|
||||
; GFX9-NEXT: BB5_1: ; %bb3
|
||||
; GFX9-NEXT: .LBB5_1: ; %bb3
|
||||
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX9-NEXT: v_and_b32_e32 v0, s6, v4
|
||||
; GFX9-NEXT: v_cvt_f32_u32_e32 v8, v0
|
||||
|
@ -520,7 +520,7 @@ define amdgpu_kernel void @urem16_invariant_denom(i16 addrspace(1)* nocapture %a
|
|||
; GFX9-NEXT: v_addc_co_u32_e64 v6, s[0:1], v7, v6, s[0:1]
|
||||
; GFX9-NEXT: v_sub_u32_e32 v0, v0, v8
|
||||
; GFX9-NEXT: global_store_short v[5:6], v0, off
|
||||
; GFX9-NEXT: s_cbranch_vccz BB5_1
|
||||
; GFX9-NEXT: s_cbranch_vccz .LBB5_1
|
||||
; GFX9-NEXT: ; %bb.2: ; %bb2
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -536,7 +536,7 @@ define amdgpu_kernel void @urem16_invariant_denom(i16 addrspace(1)* nocapture %a
|
|||
; GFX10-NEXT: s_and_b32 s4, s1, s4
|
||||
; GFX10-NEXT: v_cvt_f32_u32_e32 v2, s4
|
||||
; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v2
|
||||
; GFX10-NEXT: BB5_1: ; %bb3
|
||||
; GFX10-NEXT: .LBB5_1: ; %bb3
|
||||
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, s1, v4
|
||||
; GFX10-NEXT: v_add_nc_u16 v4, v4, 1
|
||||
|
@ -555,7 +555,7 @@ define amdgpu_kernel void @urem16_invariant_denom(i16 addrspace(1)* nocapture %a
|
|||
; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo
|
||||
; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v7
|
||||
; GFX10-NEXT: global_store_short v[5:6], v0, off
|
||||
; GFX10-NEXT: s_cbranch_vccz BB5_1
|
||||
; GFX10-NEXT: s_cbranch_vccz .LBB5_1
|
||||
; GFX10-NEXT: ; %bb.2: ; %bb2
|
||||
; GFX10-NEXT: s_endpgm
|
||||
bb:
|
||||
|
@ -587,7 +587,7 @@ define amdgpu_kernel void @sdiv16_invariant_denom(i16 addrspace(1)* nocapture %a
|
|||
; GFX9-NEXT: v_cvt_f32_i32_e32 v2, s4
|
||||
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
|
||||
; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v2
|
||||
; GFX9-NEXT: BB6_1: ; %bb3
|
||||
; GFX9-NEXT: .LBB6_1: ; %bb3
|
||||
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX9-NEXT: v_bfe_i32 v5, v4, 0, 16
|
||||
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v4
|
||||
|
@ -611,7 +611,7 @@ define amdgpu_kernel void @sdiv16_invariant_denom(i16 addrspace(1)* nocapture %a
|
|||
; GFX9-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GFX9-NEXT: v_add_u32_e32 v0, v8, v0
|
||||
; GFX9-NEXT: global_store_short v[5:6], v0, off
|
||||
; GFX9-NEXT: s_cbranch_vccz BB6_1
|
||||
; GFX9-NEXT: s_cbranch_vccz .LBB6_1
|
||||
; GFX9-NEXT: ; %bb.2: ; %bb2
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -626,7 +626,7 @@ define amdgpu_kernel void @sdiv16_invariant_denom(i16 addrspace(1)* nocapture %a
|
|||
; GFX10-NEXT: s_sext_i32_i16 s4, s4
|
||||
; GFX10-NEXT: v_cvt_f32_i32_e32 v2, s4
|
||||
; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v2
|
||||
; GFX10-NEXT: BB6_1: ; %bb3
|
||||
; GFX10-NEXT: .LBB6_1: ; %bb3
|
||||
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-NEXT: v_bfe_i32 v5, v4, 0, 16
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v4
|
||||
|
@ -648,7 +648,7 @@ define amdgpu_kernel void @sdiv16_invariant_denom(i16 addrspace(1)* nocapture %a
|
|||
; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, v8, s1
|
||||
; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v7
|
||||
; GFX10-NEXT: global_store_short v[5:6], v0, off
|
||||
; GFX10-NEXT: s_cbranch_vccz BB6_1
|
||||
; GFX10-NEXT: s_cbranch_vccz .LBB6_1
|
||||
; GFX10-NEXT: ; %bb.2: ; %bb2
|
||||
; GFX10-NEXT: s_endpgm
|
||||
bb:
|
||||
|
@ -680,7 +680,7 @@ define amdgpu_kernel void @srem16_invariant_denom(i16 addrspace(1)* nocapture %a
|
|||
; GFX9-NEXT: s_sext_i32_i16 s6, s2
|
||||
; GFX9-NEXT: v_cvt_f32_i32_e32 v2, s6
|
||||
; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v2
|
||||
; GFX9-NEXT: BB7_1: ; %bb3
|
||||
; GFX9-NEXT: .LBB7_1: ; %bb3
|
||||
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX9-NEXT: v_bfe_i32 v7, v4, 0, 16
|
||||
; GFX9-NEXT: v_cvt_f32_i32_e32 v10, v7
|
||||
|
@ -705,7 +705,7 @@ define amdgpu_kernel void @srem16_invariant_denom(i16 addrspace(1)* nocapture %a
|
|||
; GFX9-NEXT: v_addc_co_u32_e64 v6, s[0:1], v8, v6, s[0:1]
|
||||
; GFX9-NEXT: v_sub_u32_e32 v0, v7, v0
|
||||
; GFX9-NEXT: global_store_short v[5:6], v0, off
|
||||
; GFX9-NEXT: s_cbranch_vccz BB7_1
|
||||
; GFX9-NEXT: s_cbranch_vccz .LBB7_1
|
||||
; GFX9-NEXT: ; %bb.2: ; %bb2
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -720,7 +720,7 @@ define amdgpu_kernel void @srem16_invariant_denom(i16 addrspace(1)* nocapture %a
|
|||
; GFX10-NEXT: s_sext_i32_i16 s1, s4
|
||||
; GFX10-NEXT: v_cvt_f32_i32_e32 v2, s1
|
||||
; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v2
|
||||
; GFX10-NEXT: BB7_1: ; %bb3
|
||||
; GFX10-NEXT: .LBB7_1: ; %bb3
|
||||
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-NEXT: v_bfe_i32 v7, v4, 0, 16
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v4
|
||||
|
@ -744,7 +744,7 @@ define amdgpu_kernel void @srem16_invariant_denom(i16 addrspace(1)* nocapture %a
|
|||
; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo
|
||||
; GFX10-NEXT: v_sub_nc_u32_e32 v0, v7, v0
|
||||
; GFX10-NEXT: global_store_short v[5:6], v0, off
|
||||
; GFX10-NEXT: s_cbranch_vccz BB7_1
|
||||
; GFX10-NEXT: s_cbranch_vccz .LBB7_1
|
||||
; GFX10-NEXT: ; %bb.2: ; %bb2
|
||||
; GFX10-NEXT: s_endpgm
|
||||
bb:
|
||||
|
|
|
@ -5,7 +5,7 @@ declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32, float, float, <
|
|||
|
||||
; GCN-LABEL: {{^}}water_loop_rsrc:
|
||||
|
||||
; GCN: [[RSRC_LOOP:[a-zA-Z0-9_]+]]: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN: [[RSRC_LOOP:.L[a-zA-Z0-9_]+]]: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: v_readfirstlane_b32 s[[SREG0:[0-9]+]], v[[VREG0:[0-9]+]]
|
||||
; GCN-NEXT: v_readfirstlane_b32 s[[SREG1:[0-9]+]], v[[VREG1:[0-9]+]]
|
||||
; GCN-NEXT: v_readfirstlane_b32 s[[SREG2:[0-9]+]], v[[VREG2:[0-9]+]]
|
||||
|
@ -37,7 +37,7 @@ main_body:
|
|||
|
||||
; GCN-LABEL: {{^}}water_loop_samp:
|
||||
|
||||
; GCN: [[SAMP_LOOP:[a-zA-Z0-9_]+]]: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN: [[SAMP_LOOP:.L[a-zA-Z0-9_]+]]: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: v_readfirstlane_b32 s[[SREG0:[0-9]+]], v[[VREG0:[0-9]+]]
|
||||
; GCN-NEXT: v_readfirstlane_b32 s[[SREG1:[0-9]+]], v[[VREG1:[0-9]+]]
|
||||
; GCN-NEXT: v_readfirstlane_b32 s[[SREG2:[0-9]+]], v[[VREG2:[0-9]+]]
|
||||
|
|
|
@ -26,11 +26,11 @@ entry:
|
|||
; CHECK-LABEL: {{^}}extract_adjacent_blocks:
|
||||
; CHECK: s_load_dword [[ARG:s[0-9]+]]
|
||||
; CHECK: s_cmp_lg_u32
|
||||
; CHECK: s_cbranch_scc1 [[BB4:BB[0-9]+_[0-9]+]]
|
||||
; CHECK: s_cbranch_scc1 [[BB4:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; CHECK: buffer_load_dwordx4
|
||||
|
||||
; CHECK: s_branch [[ENDBB:BB[0-9]+_[0-9]+]]
|
||||
; CHECK: s_branch [[ENDBB:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; CHECK: [[BB4]]:
|
||||
; CHECK: buffer_load_dwordx4
|
||||
|
|
|
@ -508,12 +508,12 @@ define amdgpu_kernel void @insertelement_v16f32_or_index(<16 x float> addrspace(
|
|||
; GCN-LABEL: {{^}}broken_phi_bb:
|
||||
; GCN: v_mov_b32_e32 [[PHIREG:v[0-9]+]], 8
|
||||
|
||||
; GCN: {{BB[0-9]+_[0-9]+}}:
|
||||
; GCN: [[BB2:BB[0-9]+_[0-9]+]]:
|
||||
; GCN: {{.LBB[0-9]+_[0-9]+}}:
|
||||
; GCN: [[BB2:.LBB[0-9]+_[0-9]+]]:
|
||||
; GCN: v_cmp_le_i32_e32 vcc, s{{[0-9]+}}, [[PHIREG]]
|
||||
; GCN: buffer_load_dword
|
||||
|
||||
; GCN: [[REGLOOP:BB[0-9]+_[0-9]+]]:
|
||||
; GCN: [[REGLOOP:.LBB[0-9]+_[0-9]+]]:
|
||||
; MOVREL: v_movreld_b32_e32
|
||||
|
||||
; IDXMODE: s_set_gpr_idx_on
|
||||
|
|
|
@ -424,7 +424,7 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) {
|
|||
; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
|
||||
; GCN-NEXT: s_mov_b64 s[40:41], s[4:5]
|
||||
; GCN-NEXT: s_mov_b64 s[46:47], exec
|
||||
; GCN-NEXT: BB2_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: v_readfirstlane_b32 s16, v0
|
||||
; GCN-NEXT: v_readfirstlane_b32 s17, v1
|
||||
; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
|
||||
|
@ -440,7 +440,7 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) {
|
|||
; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GCN-NEXT: ; implicit-def: $vgpr31
|
||||
; GCN-NEXT: s_xor_b64 exec, exec, s[48:49]
|
||||
; GCN-NEXT: s_cbranch_execnz BB2_1
|
||||
; GCN-NEXT: s_cbranch_execnz .LBB2_1
|
||||
; GCN-NEXT: ; %bb.2:
|
||||
; GCN-NEXT: s_mov_b64 exec, s[46:47]
|
||||
; GCN-NEXT: v_readlane_b32 s4, v40, 15
|
||||
|
@ -502,7 +502,7 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) {
|
|||
; GISEL-NEXT: v_writelane_b32 v40, s30, 15
|
||||
; GISEL-NEXT: v_writelane_b32 v40, s31, 16
|
||||
; GISEL-NEXT: s_mov_b64 s[46:47], exec
|
||||
; GISEL-NEXT: BB2_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GISEL-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s16, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s17, v1
|
||||
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
|
||||
|
@ -518,7 +518,7 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) {
|
|||
; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GISEL-NEXT: ; implicit-def: $vgpr31
|
||||
; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49]
|
||||
; GISEL-NEXT: s_cbranch_execnz BB2_1
|
||||
; GISEL-NEXT: s_cbranch_execnz .LBB2_1
|
||||
; GISEL-NEXT: ; %bb.2:
|
||||
; GISEL-NEXT: s_mov_b64 exec, s[46:47]
|
||||
; GISEL-NEXT: v_readlane_b32 s4, v40, 15
|
||||
|
@ -585,7 +585,7 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) {
|
|||
; GCN-NEXT: s_mov_b64 s[40:41], s[4:5]
|
||||
; GCN-NEXT: s_mov_b64 s[46:47], exec
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, 0x7b
|
||||
; GCN-NEXT: BB3_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: v_readfirstlane_b32 s16, v0
|
||||
; GCN-NEXT: v_readfirstlane_b32 s17, v1
|
||||
; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
|
||||
|
@ -603,7 +603,7 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) {
|
|||
; GCN-NEXT: ; implicit-def: $vgpr31
|
||||
; GCN-NEXT: ; implicit-def: $vgpr2
|
||||
; GCN-NEXT: s_xor_b64 exec, exec, s[48:49]
|
||||
; GCN-NEXT: s_cbranch_execnz BB3_1
|
||||
; GCN-NEXT: s_cbranch_execnz .LBB3_1
|
||||
; GCN-NEXT: ; %bb.2:
|
||||
; GCN-NEXT: s_mov_b64 exec, s[46:47]
|
||||
; GCN-NEXT: v_readlane_b32 s4, v40, 15
|
||||
|
@ -665,7 +665,7 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) {
|
|||
; GISEL-NEXT: v_writelane_b32 v40, s30, 15
|
||||
; GISEL-NEXT: v_writelane_b32 v40, s31, 16
|
||||
; GISEL-NEXT: s_mov_b64 s[46:47], exec
|
||||
; GISEL-NEXT: BB3_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s16, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s17, v1
|
||||
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
|
||||
|
@ -682,7 +682,7 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) {
|
|||
; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GISEL-NEXT: ; implicit-def: $vgpr31
|
||||
; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49]
|
||||
; GISEL-NEXT: s_cbranch_execnz BB3_1
|
||||
; GISEL-NEXT: s_cbranch_execnz .LBB3_1
|
||||
; GISEL-NEXT: ; %bb.2:
|
||||
; GISEL-NEXT: s_mov_b64 exec, s[46:47]
|
||||
; GISEL-NEXT: v_readlane_b32 s4, v40, 15
|
||||
|
@ -748,7 +748,7 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) {
|
|||
; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
|
||||
; GCN-NEXT: s_mov_b64 s[40:41], s[4:5]
|
||||
; GCN-NEXT: s_mov_b64 s[46:47], exec
|
||||
; GCN-NEXT: BB4_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: v_readfirstlane_b32 s16, v0
|
||||
; GCN-NEXT: v_readfirstlane_b32 s17, v1
|
||||
; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
|
||||
|
@ -765,7 +765,7 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) {
|
|||
; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GCN-NEXT: ; implicit-def: $vgpr31
|
||||
; GCN-NEXT: s_xor_b64 exec, exec, s[48:49]
|
||||
; GCN-NEXT: s_cbranch_execnz BB4_1
|
||||
; GCN-NEXT: s_cbranch_execnz .LBB4_1
|
||||
; GCN-NEXT: ; %bb.2:
|
||||
; GCN-NEXT: s_mov_b64 exec, s[46:47]
|
||||
; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v2
|
||||
|
@ -828,7 +828,7 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) {
|
|||
; GISEL-NEXT: v_writelane_b32 v40, s30, 15
|
||||
; GISEL-NEXT: v_writelane_b32 v40, s31, 16
|
||||
; GISEL-NEXT: s_mov_b64 s[46:47], exec
|
||||
; GISEL-NEXT: BB4_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s16, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s17, v1
|
||||
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
|
||||
|
@ -845,7 +845,7 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) {
|
|||
; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GISEL-NEXT: ; implicit-def: $vgpr31
|
||||
; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49]
|
||||
; GISEL-NEXT: s_cbranch_execnz BB4_1
|
||||
; GISEL-NEXT: s_cbranch_execnz .LBB4_1
|
||||
; GISEL-NEXT: ; %bb.2:
|
||||
; GISEL-NEXT: s_mov_b64 exec, s[46:47]
|
||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v2
|
||||
|
@ -915,12 +915,12 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) {
|
|||
; GCN-NEXT: v_and_b32_e32 v2, 1, v2
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
|
||||
; GCN-NEXT: s_and_saveexec_b64 s[46:47], vcc
|
||||
; GCN-NEXT: s_cbranch_execz BB5_4
|
||||
; GCN-NEXT: s_cbranch_execz .LBB5_4
|
||||
; GCN-NEXT: ; %bb.1: ; %bb1
|
||||
; GCN-NEXT: v_writelane_b32 v40, s30, 17
|
||||
; GCN-NEXT: v_writelane_b32 v40, s31, 18
|
||||
; GCN-NEXT: s_mov_b64 s[48:49], exec
|
||||
; GCN-NEXT: BB5_2: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: v_readfirstlane_b32 s16, v0
|
||||
; GCN-NEXT: v_readfirstlane_b32 s17, v1
|
||||
; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
|
||||
|
@ -936,12 +936,12 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) {
|
|||
; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GCN-NEXT: ; implicit-def: $vgpr31
|
||||
; GCN-NEXT: s_xor_b64 exec, exec, s[50:51]
|
||||
; GCN-NEXT: s_cbranch_execnz BB5_2
|
||||
; GCN-NEXT: s_cbranch_execnz .LBB5_2
|
||||
; GCN-NEXT: ; %bb.3:
|
||||
; GCN-NEXT: s_mov_b64 exec, s[48:49]
|
||||
; GCN-NEXT: v_readlane_b32 s30, v40, 17
|
||||
; GCN-NEXT: v_readlane_b32 s31, v40, 18
|
||||
; GCN-NEXT: BB5_4: ; %bb2
|
||||
; GCN-NEXT: .LBB5_4: ; %bb2
|
||||
; GCN-NEXT: s_or_b64 exec, exec, s[46:47]
|
||||
; GCN-NEXT: v_readlane_b32 s51, v40, 16
|
||||
; GCN-NEXT: v_readlane_b32 s50, v40, 15
|
||||
|
@ -1004,12 +1004,12 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) {
|
|||
; GISEL-NEXT: v_and_b32_e32 v2, 1, v2
|
||||
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
|
||||
; GISEL-NEXT: s_and_saveexec_b64 s[46:47], vcc
|
||||
; GISEL-NEXT: s_cbranch_execz BB5_4
|
||||
; GISEL-NEXT: s_cbranch_execz .LBB5_4
|
||||
; GISEL-NEXT: ; %bb.1: ; %bb1
|
||||
; GISEL-NEXT: v_writelane_b32 v40, s30, 17
|
||||
; GISEL-NEXT: v_writelane_b32 v40, s31, 18
|
||||
; GISEL-NEXT: s_mov_b64 s[48:49], exec
|
||||
; GISEL-NEXT: BB5_2: ; =>This Inner Loop Header: Depth=1
|
||||
; GISEL-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s16, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s17, v1
|
||||
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
|
||||
|
@ -1025,12 +1025,12 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) {
|
|||
; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GISEL-NEXT: ; implicit-def: $vgpr31
|
||||
; GISEL-NEXT: s_xor_b64 exec, exec, s[50:51]
|
||||
; GISEL-NEXT: s_cbranch_execnz BB5_2
|
||||
; GISEL-NEXT: s_cbranch_execnz .LBB5_2
|
||||
; GISEL-NEXT: ; %bb.3:
|
||||
; GISEL-NEXT: s_mov_b64 exec, s[48:49]
|
||||
; GISEL-NEXT: v_readlane_b32 s30, v40, 17
|
||||
; GISEL-NEXT: v_readlane_b32 s31, v40, 18
|
||||
; GISEL-NEXT: BB5_4: ; %bb2
|
||||
; GISEL-NEXT: .LBB5_4: ; %bb2
|
||||
; GISEL-NEXT: s_or_b64 exec, exec, s[46:47]
|
||||
; GISEL-NEXT: v_readlane_b32 s51, v40, 16
|
||||
; GISEL-NEXT: v_readlane_b32 s50, v40, 15
|
||||
|
@ -1110,7 +1110,7 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(void(i32)* %fptr) {
|
|||
; GCN-NEXT: s_mov_b64 s[6:7], s[30:31]
|
||||
; GCN-NEXT: s_mov_b64 s[8:9], exec
|
||||
; GCN-NEXT: s_movk_i32 s4, 0x7b
|
||||
; GCN-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: v_readfirstlane_b32 s12, v0
|
||||
; GCN-NEXT: v_readfirstlane_b32 s13, v1
|
||||
; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[12:13], v[0:1]
|
||||
|
@ -1118,7 +1118,7 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(void(i32)* %fptr) {
|
|||
; GCN-NEXT: s_swappc_b64 s[30:31], s[12:13]
|
||||
; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GCN-NEXT: s_xor_b64 exec, exec, s[10:11]
|
||||
; GCN-NEXT: s_cbranch_execnz BB6_1
|
||||
; GCN-NEXT: s_cbranch_execnz .LBB6_1
|
||||
; GCN-NEXT: ; %bb.2:
|
||||
; GCN-NEXT: s_mov_b64 exec, s[8:9]
|
||||
; GCN-NEXT: v_readlane_b32 s63, v40, 29
|
||||
|
@ -1201,7 +1201,7 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(void(i32)* %fptr) {
|
|||
; GISEL-NEXT: s_mov_b64 s[6:7], s[30:31]
|
||||
; GISEL-NEXT: s_movk_i32 s4, 0x7b
|
||||
; GISEL-NEXT: s_mov_b64 s[8:9], exec
|
||||
; GISEL-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s10, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s11, v1
|
||||
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1]
|
||||
|
@ -1209,7 +1209,7 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(void(i32)* %fptr) {
|
|||
; GISEL-NEXT: s_swappc_b64 s[30:31], s[10:11]
|
||||
; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GISEL-NEXT: s_xor_b64 exec, exec, s[12:13]
|
||||
; GISEL-NEXT: s_cbranch_execnz BB6_1
|
||||
; GISEL-NEXT: s_cbranch_execnz .LBB6_1
|
||||
; GISEL-NEXT: ; %bb.2:
|
||||
; GISEL-NEXT: s_mov_b64 exec, s[8:9]
|
||||
; GISEL-NEXT: v_readlane_b32 s63, v40, 29
|
||||
|
@ -1297,7 +1297,7 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, void(i32)* %fptr)
|
|||
; GCN-NEXT: s_mov_b64 s[4:5], s[30:31]
|
||||
; GCN-NEXT: v_mov_b32_e32 v41, v0
|
||||
; GCN-NEXT: s_mov_b64 s[6:7], exec
|
||||
; GCN-NEXT: BB7_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: v_readfirstlane_b32 s10, v1
|
||||
; GCN-NEXT: v_readfirstlane_b32 s11, v2
|
||||
; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[1:2]
|
||||
|
@ -1306,7 +1306,7 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, void(i32)* %fptr)
|
|||
; GCN-NEXT: s_swappc_b64 s[30:31], s[10:11]
|
||||
; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2
|
||||
; GCN-NEXT: s_xor_b64 exec, exec, s[8:9]
|
||||
; GCN-NEXT: s_cbranch_execnz BB7_1
|
||||
; GCN-NEXT: s_cbranch_execnz .LBB7_1
|
||||
; GCN-NEXT: ; %bb.2:
|
||||
; GCN-NEXT: s_mov_b64 exec, s[6:7]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, v41
|
||||
|
@ -1392,7 +1392,7 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, void(i32)* %fptr)
|
|||
; GISEL-NEXT: v_mov_b32_e32 v41, v0
|
||||
; GISEL-NEXT: s_mov_b64 s[4:5], s[30:31]
|
||||
; GISEL-NEXT: s_mov_b64 s[6:7], exec
|
||||
; GISEL-NEXT: BB7_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GISEL-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s8, v1
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s9, v2
|
||||
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
|
||||
|
@ -1401,7 +1401,7 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, void(i32)* %fptr)
|
|||
; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9]
|
||||
; GISEL-NEXT: ; implicit-def: $vgpr1_vgpr2
|
||||
; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11]
|
||||
; GISEL-NEXT: s_cbranch_execnz BB7_1
|
||||
; GISEL-NEXT: s_cbranch_execnz .LBB7_1
|
||||
; GISEL-NEXT: ; %bb.2:
|
||||
; GISEL-NEXT: s_mov_b64 exec, s[6:7]
|
||||
; GISEL-NEXT: v_mov_b32_e32 v0, v41
|
||||
|
@ -1493,7 +1493,7 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, i32(i32)* %fptr)
|
|||
; GCN-NEXT: v_writelane_b32 v40, s63, 29
|
||||
; GCN-NEXT: s_mov_b64 s[4:5], s[30:31]
|
||||
; GCN-NEXT: s_mov_b64 s[6:7], exec
|
||||
; GCN-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: v_readfirstlane_b32 s10, v1
|
||||
; GCN-NEXT: v_readfirstlane_b32 s11, v2
|
||||
; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[1:2]
|
||||
|
@ -1503,7 +1503,7 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, i32(i32)* %fptr)
|
|||
; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2
|
||||
; GCN-NEXT: ; implicit-def: $vgpr0
|
||||
; GCN-NEXT: s_xor_b64 exec, exec, s[8:9]
|
||||
; GCN-NEXT: s_cbranch_execnz BB8_1
|
||||
; GCN-NEXT: s_cbranch_execnz .LBB8_1
|
||||
; GCN-NEXT: ; %bb.2:
|
||||
; GCN-NEXT: s_mov_b64 exec, s[6:7]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, v3
|
||||
|
@ -1586,7 +1586,7 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, i32(i32)* %fptr)
|
|||
; GISEL-NEXT: v_writelane_b32 v40, s63, 29
|
||||
; GISEL-NEXT: s_mov_b64 s[4:5], s[30:31]
|
||||
; GISEL-NEXT: s_mov_b64 s[6:7], exec
|
||||
; GISEL-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s8, v1
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s9, v2
|
||||
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
|
||||
|
@ -1596,7 +1596,7 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, i32(i32)* %fptr)
|
|||
; GISEL-NEXT: ; implicit-def: $vgpr1_vgpr2
|
||||
; GISEL-NEXT: ; implicit-def: $vgpr0
|
||||
; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11]
|
||||
; GISEL-NEXT: s_cbranch_execnz BB8_1
|
||||
; GISEL-NEXT: s_cbranch_execnz .LBB8_1
|
||||
; GISEL-NEXT: ; %bb.2:
|
||||
; GISEL-NEXT: s_mov_b64 exec, s[6:7]
|
||||
; GISEL-NEXT: v_mov_b32_e32 v0, v3
|
||||
|
@ -1684,7 +1684,7 @@ define void @test_indirect_tail_call_vgpr_ptr(void()* %fptr) {
|
|||
; GCN-NEXT: v_writelane_b32 v40, s63, 29
|
||||
; GCN-NEXT: s_mov_b64 s[4:5], s[30:31]
|
||||
; GCN-NEXT: s_mov_b64 s[6:7], exec
|
||||
; GCN-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: v_readfirstlane_b32 s10, v0
|
||||
; GCN-NEXT: v_readfirstlane_b32 s11, v1
|
||||
; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1]
|
||||
|
@ -1692,7 +1692,7 @@ define void @test_indirect_tail_call_vgpr_ptr(void()* %fptr) {
|
|||
; GCN-NEXT: s_swappc_b64 s[30:31], s[10:11]
|
||||
; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GCN-NEXT: s_xor_b64 exec, exec, s[8:9]
|
||||
; GCN-NEXT: s_cbranch_execnz BB9_1
|
||||
; GCN-NEXT: s_cbranch_execnz .LBB9_1
|
||||
; GCN-NEXT: ; %bb.2:
|
||||
; GCN-NEXT: s_mov_b64 exec, s[6:7]
|
||||
; GCN-NEXT: v_readlane_b32 s63, v40, 29
|
||||
|
@ -1774,7 +1774,7 @@ define void @test_indirect_tail_call_vgpr_ptr(void()* %fptr) {
|
|||
; GISEL-NEXT: v_writelane_b32 v40, s63, 29
|
||||
; GISEL-NEXT: s_mov_b64 s[4:5], s[30:31]
|
||||
; GISEL-NEXT: s_mov_b64 s[6:7], exec
|
||||
; GISEL-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GISEL-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s8, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s9, v1
|
||||
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1]
|
||||
|
@ -1782,7 +1782,7 @@ define void @test_indirect_tail_call_vgpr_ptr(void()* %fptr) {
|
|||
; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9]
|
||||
; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11]
|
||||
; GISEL-NEXT: s_cbranch_execnz BB9_1
|
||||
; GISEL-NEXT: s_cbranch_execnz .LBB9_1
|
||||
; GISEL-NEXT: ; %bb.2:
|
||||
; GISEL-NEXT: s_mov_b64 exec, s[6:7]
|
||||
; GISEL-NEXT: v_readlane_b32 s63, v40, 29
|
||||
|
|
|
@ -10,12 +10,12 @@ define amdgpu_kernel void @infinite_loop(i32 addrspace(1)* %out) {
|
|||
; SI-NEXT: s_mov_b32 s3, 0xf000
|
||||
; SI-NEXT: s_mov_b32 s2, -1
|
||||
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
|
||||
; SI-NEXT: BB0_1: ; %loop
|
||||
; SI-NEXT: .LBB0_1: ; %loop
|
||||
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: s_branch BB0_1
|
||||
; SI-NEXT: s_branch .LBB0_1
|
||||
; IR-LABEL: @infinite_loop(
|
||||
; IR-NEXT: entry:
|
||||
; IR-NEXT: br label [[LOOP:%.*]]
|
||||
|
@ -35,21 +35,21 @@ define amdgpu_kernel void @infinite_loop_ret(i32 addrspace(1)* %out) {
|
|||
; SI: ; %bb.0: ; %entry
|
||||
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
|
||||
; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; SI-NEXT: s_cbranch_execz BB1_3
|
||||
; SI-NEXT: s_cbranch_execz .LBB1_3
|
||||
; SI-NEXT: ; %bb.1: ; %loop.preheader
|
||||
; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
|
||||
; SI-NEXT: s_mov_b32 s3, 0xf000
|
||||
; SI-NEXT: s_mov_b32 s2, -1
|
||||
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
|
||||
; SI-NEXT: s_and_b64 vcc, exec, -1
|
||||
; SI-NEXT: BB1_2: ; %loop
|
||||
; SI-NEXT: .LBB1_2: ; %loop
|
||||
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: s_mov_b64 vcc, vcc
|
||||
; SI-NEXT: s_cbranch_vccnz BB1_2
|
||||
; SI-NEXT: BB1_3: ; %UnifiedReturnBlock
|
||||
; SI-NEXT: s_cbranch_vccnz .LBB1_2
|
||||
; SI-NEXT: .LBB1_3: ; %UnifiedReturnBlock
|
||||
; SI-NEXT: s_endpgm
|
||||
; IR-LABEL: @infinite_loop_ret(
|
||||
; IR-NEXT: entry:
|
||||
|
@ -79,39 +79,39 @@ define amdgpu_kernel void @infinite_loops(i32 addrspace(1)* %out) {
|
|||
; SI: ; %bb.0: ; %entry
|
||||
; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
|
||||
; SI-NEXT: s_mov_b64 s[2:3], -1
|
||||
; SI-NEXT: s_cbranch_scc1 BB2_4
|
||||
; SI-NEXT: s_cbranch_scc1 .LBB2_4
|
||||
; SI-NEXT: ; %bb.1:
|
||||
; SI-NEXT: s_mov_b32 s3, 0xf000
|
||||
; SI-NEXT: s_mov_b32 s2, -1
|
||||
; SI-NEXT: v_mov_b32_e32 v0, 0x378
|
||||
; SI-NEXT: s_and_b64 vcc, exec, -1
|
||||
; SI-NEXT: BB2_2: ; %loop2
|
||||
; SI-NEXT: .LBB2_2: ; %loop2
|
||||
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: s_mov_b64 vcc, vcc
|
||||
; SI-NEXT: s_cbranch_vccnz BB2_2
|
||||
; SI-NEXT: s_cbranch_vccnz .LBB2_2
|
||||
; SI-NEXT: ; %bb.3: ; %Flow
|
||||
; SI-NEXT: s_mov_b64 s[2:3], 0
|
||||
; SI-NEXT: BB2_4: ; %Flow2
|
||||
; SI-NEXT: .LBB2_4: ; %Flow2
|
||||
; SI-NEXT: s_and_b64 vcc, exec, s[2:3]
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: s_mov_b64 vcc, vcc
|
||||
; SI-NEXT: s_cbranch_vccz BB2_7
|
||||
; SI-NEXT: s_cbranch_vccz .LBB2_7
|
||||
; SI-NEXT: ; %bb.5:
|
||||
; SI-NEXT: s_mov_b32 s3, 0xf000
|
||||
; SI-NEXT: s_mov_b32 s2, -1
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
|
||||
; SI-NEXT: s_and_b64 vcc, exec, 0
|
||||
; SI-NEXT: BB2_6: ; %loop1
|
||||
; SI-NEXT: .LBB2_6: ; %loop1
|
||||
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: s_mov_b64 vcc, vcc
|
||||
; SI-NEXT: s_cbranch_vccz BB2_6
|
||||
; SI-NEXT: BB2_7: ; %DummyReturnBlock
|
||||
; SI-NEXT: s_cbranch_vccz .LBB2_6
|
||||
; SI-NEXT: .LBB2_7: ; %DummyReturnBlock
|
||||
; SI-NEXT: s_endpgm
|
||||
; IR-LABEL: @infinite_loops(
|
||||
; IR-NEXT: entry:
|
||||
|
@ -141,18 +141,18 @@ define amdgpu_kernel void @infinite_loop_nest_ret(i32 addrspace(1)* %out) {
|
|||
; SI: ; %bb.0: ; %entry
|
||||
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
|
||||
; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; SI-NEXT: s_cbranch_execz BB3_5
|
||||
; SI-NEXT: s_cbranch_execz .LBB3_5
|
||||
; SI-NEXT: ; %bb.1: ; %outer_loop.preheader
|
||||
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
|
||||
; SI-NEXT: v_cmp_ne_u32_e64 s[0:1], 3, v0
|
||||
; SI-NEXT: s_mov_b32 s7, 0xf000
|
||||
; SI-NEXT: s_mov_b32 s6, -1
|
||||
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
|
||||
; SI-NEXT: BB3_2: ; %outer_loop
|
||||
; SI-NEXT: .LBB3_2: ; %outer_loop
|
||||
; SI-NEXT: ; =>This Loop Header: Depth=1
|
||||
; SI-NEXT: ; Child Loop BB3_3 Depth 2
|
||||
; SI-NEXT: s_mov_b64 s[2:3], 0
|
||||
; SI-NEXT: BB3_3: ; %inner_loop
|
||||
; SI-NEXT: .LBB3_3: ; %inner_loop
|
||||
; SI-NEXT: ; Parent Loop BB3_2 Depth=1
|
||||
; SI-NEXT: ; => This Inner Loop Header: Depth=2
|
||||
; SI-NEXT: s_and_b64 s[8:9], exec, s[0:1]
|
||||
|
@ -161,13 +161,13 @@ define amdgpu_kernel void @infinite_loop_nest_ret(i32 addrspace(1)* %out) {
|
|||
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; SI-NEXT: s_cbranch_execnz BB3_3
|
||||
; SI-NEXT: s_cbranch_execnz .LBB3_3
|
||||
; SI-NEXT: ; %bb.4: ; %loop.exit.guard
|
||||
; SI-NEXT: ; in Loop: Header=BB3_2 Depth=1
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; SI-NEXT: s_mov_b64 vcc, 0
|
||||
; SI-NEXT: s_branch BB3_2
|
||||
; SI-NEXT: BB3_5: ; %UnifiedReturnBlock
|
||||
; SI-NEXT: s_branch .LBB3_2
|
||||
; SI-NEXT: .LBB3_5: ; %UnifiedReturnBlock
|
||||
; SI-NEXT: s_endpgm
|
||||
; IR-LABEL: @infinite_loop_nest_ret(
|
||||
; IR-NEXT: entry:
|
||||
|
|
|
@ -1534,19 +1534,19 @@ define amdgpu_kernel void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 add
|
|||
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: s_cmp_lg_u32 s6, 0
|
||||
; SI-NEXT: s_cbranch_scc0 BB30_2
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB30_2
|
||||
; SI-NEXT: ; %bb.1: ; %else
|
||||
; SI-NEXT: s_load_dword s7, s[2:3], 0x1
|
||||
; SI-NEXT: s_mov_b64 s[4:5], 0
|
||||
; SI-NEXT: s_andn2_b64 vcc, exec, s[4:5]
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: s_mov_b64 vcc, vcc
|
||||
; SI-NEXT: s_cbranch_vccz BB30_3
|
||||
; SI-NEXT: s_branch BB30_4
|
||||
; SI-NEXT: BB30_2:
|
||||
; SI-NEXT: BB30_3: ; %if
|
||||
; SI-NEXT: s_cbranch_vccz .LBB30_3
|
||||
; SI-NEXT: s_branch .LBB30_4
|
||||
; SI-NEXT: .LBB30_2:
|
||||
; SI-NEXT: .LBB30_3: ; %if
|
||||
; SI-NEXT: s_load_dword s7, s[2:3], 0x0
|
||||
; SI-NEXT: BB30_4: ; %endif
|
||||
; SI-NEXT: .LBB30_4: ; %endif
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: v_mov_b32_e32 v0, s6
|
||||
; SI-NEXT: s_mov_b32 s3, 0x100f000
|
||||
|
@ -1561,16 +1561,16 @@ define amdgpu_kernel void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 add
|
|||
; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: s_cmp_lg_u32 s6, 0
|
||||
; VI-NEXT: s_cbranch_scc0 BB30_2
|
||||
; VI-NEXT: s_cbranch_scc0 .LBB30_2
|
||||
; VI-NEXT: ; %bb.1: ; %else
|
||||
; VI-NEXT: s_load_dword s7, s[2:3], 0x4
|
||||
; VI-NEXT: s_cbranch_execz BB30_3
|
||||
; VI-NEXT: s_branch BB30_4
|
||||
; VI-NEXT: BB30_2:
|
||||
; VI-NEXT: BB30_3: ; %if
|
||||
; VI-NEXT: s_cbranch_execz .LBB30_3
|
||||
; VI-NEXT: s_branch .LBB30_4
|
||||
; VI-NEXT: .LBB30_2:
|
||||
; VI-NEXT: .LBB30_3: ; %if
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: s_load_dword s7, s[2:3], 0x0
|
||||
; VI-NEXT: BB30_4: ; %endif
|
||||
; VI-NEXT: .LBB30_4: ; %endif
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: v_mov_b32_e32 v0, s6
|
||||
; VI-NEXT: s_mov_b32 s3, 0x1100f000
|
||||
|
|
|
@ -17,27 +17,27 @@ define amdgpu_ps void @return_void(float %0) #0 {
|
|||
; CHECK-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v0
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
||||
; CHECK-NEXT: s_cbranch_execz BB0_3
|
||||
; CHECK-NEXT: BB0_1: ; %loop
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB0_3
|
||||
; CHECK-NEXT: .LBB0_1: ; %loop
|
||||
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; CHECK-NEXT: s_cbranch_scc0 BB0_6
|
||||
; CHECK-NEXT: s_cbranch_scc0 .LBB0_6
|
||||
; CHECK-NEXT: ; %bb.2: ; %loop
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: s_mov_b64 exec, 0
|
||||
; CHECK-NEXT: s_mov_b64 vcc, 0
|
||||
; CHECK-NEXT: s_branch BB0_1
|
||||
; CHECK-NEXT: BB0_3: ; %Flow1
|
||||
; CHECK-NEXT: s_branch .LBB0_1
|
||||
; CHECK-NEXT: .LBB0_3: ; %Flow1
|
||||
; CHECK-NEXT: s_or_saveexec_b64 s[0:1], s[2:3]
|
||||
; CHECK-NEXT: s_xor_b64 exec, exec, s[0:1]
|
||||
; CHECK-NEXT: s_cbranch_execz BB0_5
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB0_5
|
||||
; CHECK-NEXT: ; %bb.4: ; %end
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 1.0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: exp mrt0 v1, v1, v1, v0 done vm
|
||||
; CHECK-NEXT: BB0_5: ; %UnifiedReturnBlock
|
||||
; CHECK-NEXT: .LBB0_5: ; %UnifiedReturnBlock
|
||||
; CHECK-NEXT: s_endpgm
|
||||
; CHECK-NEXT: BB0_6:
|
||||
; CHECK-NEXT: .LBB0_6:
|
||||
; CHECK-NEXT: s_mov_b64 exec, 0
|
||||
; CHECK-NEXT: exp null off, off, off, off done vm
|
||||
; CHECK-NEXT: s_endpgm
|
||||
|
@ -62,26 +62,26 @@ define amdgpu_ps void @return_void_compr(float %0) #0 {
|
|||
; CHECK-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v0
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
||||
; CHECK-NEXT: s_cbranch_execz BB1_3
|
||||
; CHECK-NEXT: BB1_1: ; %loop
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB1_3
|
||||
; CHECK-NEXT: .LBB1_1: ; %loop
|
||||
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; CHECK-NEXT: s_cbranch_scc0 BB1_6
|
||||
; CHECK-NEXT: s_cbranch_scc0 .LBB1_6
|
||||
; CHECK-NEXT: ; %bb.2: ; %loop
|
||||
; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1
|
||||
; CHECK-NEXT: s_mov_b64 exec, 0
|
||||
; CHECK-NEXT: s_mov_b64 vcc, 0
|
||||
; CHECK-NEXT: s_branch BB1_1
|
||||
; CHECK-NEXT: BB1_3: ; %Flow1
|
||||
; CHECK-NEXT: s_branch .LBB1_1
|
||||
; CHECK-NEXT: .LBB1_3: ; %Flow1
|
||||
; CHECK-NEXT: s_or_saveexec_b64 s[0:1], s[2:3]
|
||||
; CHECK-NEXT: s_xor_b64 exec, exec, s[0:1]
|
||||
; CHECK-NEXT: s_cbranch_execz BB1_5
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB1_5
|
||||
; CHECK-NEXT: ; %bb.4: ; %end
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: exp mrt0 v0, off, v0, off done compr vm
|
||||
; CHECK-NEXT: BB1_5: ; %UnifiedReturnBlock
|
||||
; CHECK-NEXT: .LBB1_5: ; %UnifiedReturnBlock
|
||||
; CHECK-NEXT: s_endpgm
|
||||
; CHECK-NEXT: BB1_6:
|
||||
; CHECK-NEXT: .LBB1_6:
|
||||
; CHECK-NEXT: s_mov_b64 exec, 0
|
||||
; CHECK-NEXT: exp null off, off, off, off done vm
|
||||
; CHECK-NEXT: s_endpgm
|
||||
|
@ -103,15 +103,15 @@ define amdgpu_ps void @only_kill() #0 {
|
|||
; CHECK-LABEL: only_kill:
|
||||
; CHECK: ; %bb.0: ; %main_body
|
||||
; CHECK-NEXT: s_mov_b64 s[0:1], exec
|
||||
; CHECK-NEXT: BB2_1: ; %loop
|
||||
; CHECK-NEXT: .LBB2_1: ; %loop
|
||||
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; CHECK-NEXT: s_cbranch_scc0 BB2_3
|
||||
; CHECK-NEXT: s_cbranch_scc0 .LBB2_3
|
||||
; CHECK-NEXT: ; %bb.2: ; %loop
|
||||
; CHECK-NEXT: ; in Loop: Header=BB2_1 Depth=1
|
||||
; CHECK-NEXT: s_mov_b64 exec, 0
|
||||
; CHECK-NEXT: s_branch BB2_1
|
||||
; CHECK-NEXT: BB2_3:
|
||||
; CHECK-NEXT: s_branch .LBB2_1
|
||||
; CHECK-NEXT: .LBB2_3:
|
||||
; CHECK-NEXT: s_mov_b64 exec, 0
|
||||
; CHECK-NEXT: exp null off, off, off, off done vm
|
||||
; CHECK-NEXT: s_endpgm
|
||||
|
@ -132,25 +132,25 @@ define amdgpu_ps float @return_nonvoid(float %0) #0 {
|
|||
; CHECK-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v0
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
||||
; CHECK-NEXT: s_cbranch_execz BB3_3
|
||||
; CHECK-NEXT: BB3_1: ; %loop
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB3_3
|
||||
; CHECK-NEXT: .LBB3_1: ; %loop
|
||||
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; CHECK-NEXT: s_cbranch_scc0 BB3_4
|
||||
; CHECK-NEXT: s_cbranch_scc0 .LBB3_4
|
||||
; CHECK-NEXT: ; %bb.2: ; %loop
|
||||
; CHECK-NEXT: ; in Loop: Header=BB3_1 Depth=1
|
||||
; CHECK-NEXT: s_mov_b64 exec, 0
|
||||
; CHECK-NEXT: s_mov_b64 vcc, exec
|
||||
; CHECK-NEXT: s_cbranch_execnz BB3_1
|
||||
; CHECK-NEXT: BB3_3: ; %Flow1
|
||||
; CHECK-NEXT: s_cbranch_execnz .LBB3_1
|
||||
; CHECK-NEXT: .LBB3_3: ; %Flow1
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: s_branch BB3_5
|
||||
; CHECK-NEXT: BB3_4:
|
||||
; CHECK-NEXT: s_branch .LBB3_5
|
||||
; CHECK-NEXT: .LBB3_4:
|
||||
; CHECK-NEXT: s_mov_b64 exec, 0
|
||||
; CHECK-NEXT: exp null off, off, off, off done vm
|
||||
; CHECK-NEXT: s_endpgm
|
||||
; CHECK-NEXT: BB3_5:
|
||||
; CHECK-NEXT: .LBB3_5:
|
||||
main_body:
|
||||
%cmp = fcmp olt float %0, 1.000000e+01
|
||||
br i1 %cmp, label %end, label %loop
|
||||
|
|
|
@ -3,20 +3,20 @@
|
|||
; Make sure that m0 is not reinitialized in the loop.
|
||||
|
||||
; GCN-LABEL: {{^}}copy_local_to_global_loop_m0_init:
|
||||
; GCN: s_cbranch_scc1 BB0_3
|
||||
; GCN: s_cbranch_scc1 .LBB0_3
|
||||
|
||||
; Initialize in preheader
|
||||
; GCN: s_mov_b32 m0, -1
|
||||
|
||||
; GCN: BB0_2:
|
||||
; GCN: .LBB0_2:
|
||||
; GCN-NOT: m0
|
||||
; GCN: ds_read_b32
|
||||
; GCN-NOT: m0
|
||||
; GCN: buffer_store_dword
|
||||
|
||||
; GCN: s_cbranch_scc0 BB0_2
|
||||
; GCN: s_cbranch_scc0 .LBB0_2
|
||||
|
||||
; GCN: BB0_3:
|
||||
; GCN: .LBB0_3:
|
||||
; GCN-NEXT: s_endpgm
|
||||
define amdgpu_kernel void @copy_local_to_global_loop_m0_init(i32 addrspace(1)* noalias nocapture %out, i32 addrspace(3)* noalias nocapture readonly %in, i32 %n) #0 {
|
||||
bb:
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
; NOLOOP: ds_gws_barrier v0 gds{{$}}
|
||||
|
||||
; LOOP: s_mov_b32 m0, 0{{$}}
|
||||
; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
|
||||
; LOOP: [[LOOP:.LBB[0-9]+_[0-9]+]]:
|
||||
; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
|
||||
; LOOP-NEXT: ds_gws_barrier v0 gds
|
||||
; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
; GCN: v_mov_b32_e32 v0, [[BAR_NUM]]
|
||||
; NOLOOP: ds_gws_init v0 gds{{$}}
|
||||
|
||||
; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
|
||||
; LOOP: [[LOOP:.LBB[0-9]+_[0-9]+]]:
|
||||
; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
|
||||
; LOOP-NEXT: ds_gws_init v0 gds
|
||||
; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
|
@ -32,7 +32,7 @@ define amdgpu_kernel void @gws_init_offset0(i32 %val) #0 {
|
|||
|
||||
|
||||
; LOOP: s_mov_b32 m0, 0{{$}}
|
||||
; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
|
||||
; LOOP: [[LOOP:.LBB[0-9]+_[0-9]+]]:
|
||||
; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
|
||||
; LOOP-NEXT: ds_gws_init v0 offset:63 gds
|
||||
; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
; NOLOOP: ds_gws_sema_br v0 gds{{$}}
|
||||
|
||||
; LOOP: s_mov_b32 m0, 0{{$}}
|
||||
; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
|
||||
; LOOP: [[LOOP:.LBB[0-9]+_[0-9]+]]:
|
||||
; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
|
||||
; LOOP-NEXT: ds_gws_sema_br v0 gds
|
||||
; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
; NOLOOP: ds_gws_sema_p gds{{$}}
|
||||
|
||||
; LOOP: s_mov_b32 m0, 0{{$}}
|
||||
; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
|
||||
; LOOP: [[LOOP:.LBB[0-9]+_[0-9]+]]:
|
||||
; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
|
||||
; LOOP-NEXT: ds_gws_sema_p gds
|
||||
; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
; NOLOOP: ds_gws_sema_release_all gds{{$}}
|
||||
|
||||
; LOOP: s_mov_b32 m0, 0{{$}}
|
||||
; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
|
||||
; LOOP: [[LOOP:.LBB[0-9]+_[0-9]+]]:
|
||||
; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
|
||||
; LOOP-NEXT: ds_gws_sema_release_all gds
|
||||
; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
; NOLOOP: ds_gws_sema_v gds{{$}}
|
||||
|
||||
; LOOP: s_mov_b32 m0, 0{{$}}
|
||||
; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
|
||||
; LOOP: [[LOOP:.LBB[0-9]+_[0-9]+]]:
|
||||
; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
|
||||
; LOOP-NEXT: ds_gws_sema_v gds
|
||||
; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
|
|
|
@ -18,7 +18,7 @@ define amdgpu_cs float @ds_ordered_swap(i32 addrspace(2)* inreg %gds, i32 %value
|
|||
; GCN: v_cmp_ne_u32_e32 vcc, 0, v[[VALUE:[0-9]+]]
|
||||
; GCN: s_and_saveexec_b64 s[[SAVED:\[[0-9]+:[0-9]+\]]], vcc
|
||||
; // We have to use s_cbranch, because ds_ordered_count has side effects with EXEC=0
|
||||
; GCN: s_cbranch_execz [[BB:BB._.]]
|
||||
; GCN: s_cbranch_execz [[BB:.LBB._.]]
|
||||
; GCN: s_mov_b32 m0, s0
|
||||
; VIGFX9-NEXT: s_nop 0
|
||||
; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[VALUE]] offset:4868 gds
|
||||
|
|
|
@ -58,19 +58,19 @@ define amdgpu_kernel void @set_inactive_scc(i32 addrspace(1)* %out, i32 %in, <4
|
|||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_cmp_lg_u32 s0, 56
|
||||
; GCN-NEXT: s_mov_b64 s[0:1], -1
|
||||
; GCN-NEXT: s_cbranch_scc1 BB2_3
|
||||
; GCN-NEXT: s_cbranch_scc1 .LBB2_3
|
||||
; GCN-NEXT: ; %bb.1: ; %Flow
|
||||
; GCN-NEXT: s_andn2_b64 vcc, exec, s[0:1]
|
||||
; GCN-NEXT: s_cbranch_vccz BB2_4
|
||||
; GCN-NEXT: BB2_2: ; %.exit
|
||||
; GCN-NEXT: s_cbranch_vccz .LBB2_4
|
||||
; GCN-NEXT: .LBB2_2: ; %.exit
|
||||
; GCN-NEXT: s_endpgm
|
||||
; GCN-NEXT: BB2_3: ; %.one
|
||||
; GCN-NEXT: .LBB2_3: ; %.one
|
||||
; GCN-NEXT: v_add_u32_e32 v1, vcc, 1, v0
|
||||
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
||||
; GCN-NEXT: s_mov_b32 s6, -1
|
||||
; GCN-NEXT: buffer_store_dword v1, off, s[4:7], 0
|
||||
; GCN-NEXT: s_cbranch_execnz BB2_2
|
||||
; GCN-NEXT: BB2_4: ; %.zero
|
||||
; GCN-NEXT: s_cbranch_execnz .LBB2_2
|
||||
; GCN-NEXT: .LBB2_4: ; %.zero
|
||||
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
||||
; GCN-NEXT: s_mov_b32 s6, -1
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
|
|
|
@ -8,7 +8,7 @@ define amdgpu_gs void @main(<4 x i32> %arg, i32 %arg1) {
|
|||
; GFX10-LABEL: main:
|
||||
; GFX10: ; %bb.0: ; %bb
|
||||
; GFX10-NEXT: s_mov_b32 s1, exec_lo
|
||||
; GFX10-NEXT: BB0_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GFX10-NEXT: v_readfirstlane_b32 s5, v1
|
||||
; GFX10-NEXT: v_readfirstlane_b32 s6, v2
|
||||
|
@ -22,7 +22,7 @@ define amdgpu_gs void @main(<4 x i32> %arg, i32 %arg1) {
|
|||
; GFX10-NEXT: ; implicit-def: $vgpr4
|
||||
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
||||
; GFX10-NEXT: s_cbranch_execnz BB0_1
|
||||
; GFX10-NEXT: s_cbranch_execnz .LBB0_1
|
||||
; GFX10-NEXT: ; %bb.2:
|
||||
; GFX10-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -35,7 +35,7 @@ define amdgpu_gs void @main(<4 x i32> %arg, i32 %arg1) {
|
|||
; GFX9-LABEL: main:
|
||||
; GFX9: ; %bb.0: ; %bb
|
||||
; GFX9-NEXT: s_mov_b64 s[2:3], exec
|
||||
; GFX9-NEXT: BB0_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX9-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GFX9-NEXT: v_readfirstlane_b32 s5, v1
|
||||
; GFX9-NEXT: v_readfirstlane_b32 s6, v2
|
||||
|
@ -49,7 +49,7 @@ define amdgpu_gs void @main(<4 x i32> %arg, i32 %arg1) {
|
|||
; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; GFX9-NEXT: ; implicit-def: $vgpr4
|
||||
; GFX9-NEXT: s_xor_b64 exec, exec, s[0:1]
|
||||
; GFX9-NEXT: s_cbranch_execnz BB0_1
|
||||
; GFX9-NEXT: s_cbranch_execnz .LBB0_1
|
||||
; GFX9-NEXT: ; %bb.2:
|
||||
; GFX9-NEXT: s_mov_b64 exec, s[2:3]
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -61,7 +61,7 @@ define amdgpu_gs void @main(<4 x i32> %arg, i32 %arg1) {
|
|||
; GFX8-LABEL: main:
|
||||
; GFX8: ; %bb.0: ; %bb
|
||||
; GFX8-NEXT: s_mov_b64 s[2:3], exec
|
||||
; GFX8-NEXT: BB0_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX8-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX8-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GFX8-NEXT: v_readfirstlane_b32 s5, v1
|
||||
; GFX8-NEXT: v_readfirstlane_b32 s6, v2
|
||||
|
@ -75,7 +75,7 @@ define amdgpu_gs void @main(<4 x i32> %arg, i32 %arg1) {
|
|||
; GFX8-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; GFX8-NEXT: ; implicit-def: $vgpr4
|
||||
; GFX8-NEXT: s_xor_b64 exec, exec, s[0:1]
|
||||
; GFX8-NEXT: s_cbranch_execnz BB0_1
|
||||
; GFX8-NEXT: s_cbranch_execnz .LBB0_1
|
||||
; GFX8-NEXT: ; %bb.2:
|
||||
; GFX8-NEXT: s_mov_b64 exec, s[2:3]
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
||||
|
|
|
@ -9,13 +9,13 @@ define amdgpu_ps void @static_exact(float %arg0, float %arg1) {
|
|||
; SI: ; %bb.0: ; %.entry
|
||||
; SI-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
|
||||
; SI-NEXT: s_andn2_b64 exec, exec, exec
|
||||
; SI-NEXT: s_cbranch_scc0 BB0_2
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB0_2
|
||||
; SI-NEXT: ; %bb.1: ; %.entry
|
||||
; SI-NEXT: s_mov_b64 exec, 0
|
||||
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
|
||||
; SI-NEXT: exp mrt1 v0, v0, v0, v0 done vm
|
||||
; SI-NEXT: s_endpgm
|
||||
; SI-NEXT: BB0_2:
|
||||
; SI-NEXT: .LBB0_2:
|
||||
; SI-NEXT: s_mov_b64 exec, 0
|
||||
; SI-NEXT: exp null off, off, off, off done vm
|
||||
; SI-NEXT: s_endpgm
|
||||
|
@ -24,13 +24,13 @@ define amdgpu_ps void @static_exact(float %arg0, float %arg1) {
|
|||
; GFX9: ; %bb.0: ; %.entry
|
||||
; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
|
||||
; GFX9-NEXT: s_andn2_b64 exec, exec, exec
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB0_2
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB0_2
|
||||
; GFX9-NEXT: ; %bb.1: ; %.entry
|
||||
; GFX9-NEXT: s_mov_b64 exec, 0
|
||||
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
|
||||
; GFX9-NEXT: exp mrt1 v0, v0, v0, v0 done vm
|
||||
; GFX9-NEXT: s_endpgm
|
||||
; GFX9-NEXT: BB0_2:
|
||||
; GFX9-NEXT: .LBB0_2:
|
||||
; GFX9-NEXT: s_mov_b64 exec, 0
|
||||
; GFX9-NEXT: exp null off, off, off, off done vm
|
||||
; GFX9-NEXT: s_endpgm
|
||||
|
@ -39,13 +39,13 @@ define amdgpu_ps void @static_exact(float %arg0, float %arg1) {
|
|||
; GFX10-32: ; %bb.0: ; %.entry
|
||||
; GFX10-32-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v0
|
||||
; GFX10-32-NEXT: s_andn2_b32 exec_lo, exec_lo, exec_lo
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 BB0_2
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 .LBB0_2
|
||||
; GFX10-32-NEXT: ; %bb.1: ; %.entry
|
||||
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
|
||||
; GFX10-32-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc_lo
|
||||
; GFX10-32-NEXT: exp mrt1 v0, v0, v0, v0 done vm
|
||||
; GFX10-32-NEXT: s_endpgm
|
||||
; GFX10-32-NEXT: BB0_2:
|
||||
; GFX10-32-NEXT: .LBB0_2:
|
||||
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
|
||||
; GFX10-32-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-32-NEXT: s_endpgm
|
||||
|
@ -54,13 +54,13 @@ define amdgpu_ps void @static_exact(float %arg0, float %arg1) {
|
|||
; GFX10-64: ; %bb.0: ; %.entry
|
||||
; GFX10-64-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
|
||||
; GFX10-64-NEXT: s_andn2_b64 exec, exec, exec
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 BB0_2
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 .LBB0_2
|
||||
; GFX10-64-NEXT: ; %bb.1: ; %.entry
|
||||
; GFX10-64-NEXT: s_mov_b64 exec, 0
|
||||
; GFX10-64-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
|
||||
; GFX10-64-NEXT: exp mrt1 v0, v0, v0, v0 done vm
|
||||
; GFX10-64-NEXT: s_endpgm
|
||||
; GFX10-64-NEXT: BB0_2:
|
||||
; GFX10-64-NEXT: .LBB0_2:
|
||||
; GFX10-64-NEXT: s_mov_b64 exec, 0
|
||||
; GFX10-64-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-64-NEXT: s_endpgm
|
||||
|
@ -81,13 +81,13 @@ define amdgpu_ps void @dynamic_exact(float %arg0, float %arg1) {
|
|||
; SI-NEXT: s_xor_b64 s[0:1], s[0:1], exec
|
||||
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
|
||||
; SI-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
|
||||
; SI-NEXT: s_cbranch_scc0 BB1_2
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB1_2
|
||||
; SI-NEXT: ; %bb.1: ; %.entry
|
||||
; SI-NEXT: s_and_b64 exec, exec, s[2:3]
|
||||
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
|
||||
; SI-NEXT: exp mrt1 v0, v0, v0, v0 done vm
|
||||
; SI-NEXT: s_endpgm
|
||||
; SI-NEXT: BB1_2:
|
||||
; SI-NEXT: .LBB1_2:
|
||||
; SI-NEXT: s_mov_b64 exec, 0
|
||||
; SI-NEXT: exp null off, off, off, off done vm
|
||||
; SI-NEXT: s_endpgm
|
||||
|
@ -99,13 +99,13 @@ define amdgpu_ps void @dynamic_exact(float %arg0, float %arg1) {
|
|||
; GFX9-NEXT: s_xor_b64 s[0:1], s[0:1], exec
|
||||
; GFX9-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
|
||||
; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB1_2
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB1_2
|
||||
; GFX9-NEXT: ; %bb.1: ; %.entry
|
||||
; GFX9-NEXT: s_and_b64 exec, exec, s[2:3]
|
||||
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
|
||||
; GFX9-NEXT: exp mrt1 v0, v0, v0, v0 done vm
|
||||
; GFX9-NEXT: s_endpgm
|
||||
; GFX9-NEXT: BB1_2:
|
||||
; GFX9-NEXT: .LBB1_2:
|
||||
; GFX9-NEXT: s_mov_b64 exec, 0
|
||||
; GFX9-NEXT: exp null off, off, off, off done vm
|
||||
; GFX9-NEXT: s_endpgm
|
||||
|
@ -117,13 +117,13 @@ define amdgpu_ps void @dynamic_exact(float %arg0, float %arg1) {
|
|||
; GFX10-32-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v0
|
||||
; GFX10-32-NEXT: s_xor_b32 s0, s0, exec_lo
|
||||
; GFX10-32-NEXT: s_andn2_b32 s1, s1, s0
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 BB1_2
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 .LBB1_2
|
||||
; GFX10-32-NEXT: ; %bb.1: ; %.entry
|
||||
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s1
|
||||
; GFX10-32-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc_lo
|
||||
; GFX10-32-NEXT: exp mrt1 v0, v0, v0, v0 done vm
|
||||
; GFX10-32-NEXT: s_endpgm
|
||||
; GFX10-32-NEXT: BB1_2:
|
||||
; GFX10-32-NEXT: .LBB1_2:
|
||||
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
|
||||
; GFX10-32-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-32-NEXT: s_endpgm
|
||||
|
@ -135,13 +135,13 @@ define amdgpu_ps void @dynamic_exact(float %arg0, float %arg1) {
|
|||
; GFX10-64-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
|
||||
; GFX10-64-NEXT: s_xor_b64 s[0:1], s[0:1], exec
|
||||
; GFX10-64-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 BB1_2
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 .LBB1_2
|
||||
; GFX10-64-NEXT: ; %bb.1: ; %.entry
|
||||
; GFX10-64-NEXT: s_and_b64 exec, exec, s[2:3]
|
||||
; GFX10-64-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
|
||||
; GFX10-64-NEXT: exp mrt1 v0, v0, v0, v0 done vm
|
||||
; GFX10-64-NEXT: s_endpgm
|
||||
; GFX10-64-NEXT: BB1_2:
|
||||
; GFX10-64-NEXT: .LBB1_2:
|
||||
; GFX10-64-NEXT: s_mov_b64 exec, 0
|
||||
; GFX10-64-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-64-NEXT: s_endpgm
|
||||
|
@ -167,18 +167,18 @@ define amdgpu_ps void @branch(float %arg0, float %arg1) {
|
|||
; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v0
|
||||
; SI-NEXT: s_and_saveexec_b64 s[4:5], s[0:1]
|
||||
; SI-NEXT: s_xor_b64 s[0:1], exec, s[4:5]
|
||||
; SI-NEXT: s_cbranch_execz BB2_3
|
||||
; SI-NEXT: s_cbranch_execz .LBB2_3
|
||||
; SI-NEXT: ; %bb.1: ; %.demote
|
||||
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
|
||||
; SI-NEXT: s_cbranch_scc0 BB2_4
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB2_4
|
||||
; SI-NEXT: ; %bb.2: ; %.demote
|
||||
; SI-NEXT: s_mov_b64 exec, 0
|
||||
; SI-NEXT: BB2_3: ; %.continue
|
||||
; SI-NEXT: .LBB2_3: ; %.continue
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
|
||||
; SI-NEXT: exp mrt1 v0, v0, v0, v0 done vm
|
||||
; SI-NEXT: s_endpgm
|
||||
; SI-NEXT: BB2_4:
|
||||
; SI-NEXT: .LBB2_4:
|
||||
; SI-NEXT: s_mov_b64 exec, 0
|
||||
; SI-NEXT: exp null off, off, off, off done vm
|
||||
; SI-NEXT: s_endpgm
|
||||
|
@ -195,18 +195,18 @@ define amdgpu_ps void @branch(float %arg0, float %arg1) {
|
|||
; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v0
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], s[0:1]
|
||||
; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[4:5]
|
||||
; GFX9-NEXT: s_cbranch_execz BB2_3
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB2_3
|
||||
; GFX9-NEXT: ; %bb.1: ; %.demote
|
||||
; GFX9-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB2_4
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB2_4
|
||||
; GFX9-NEXT: ; %bb.2: ; %.demote
|
||||
; GFX9-NEXT: s_mov_b64 exec, 0
|
||||
; GFX9-NEXT: BB2_3: ; %.continue
|
||||
; GFX9-NEXT: .LBB2_3: ; %.continue
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
|
||||
; GFX9-NEXT: exp mrt1 v0, v0, v0, v0 done vm
|
||||
; GFX9-NEXT: s_endpgm
|
||||
; GFX9-NEXT: BB2_4:
|
||||
; GFX9-NEXT: .LBB2_4:
|
||||
; GFX9-NEXT: s_mov_b64 exec, 0
|
||||
; GFX9-NEXT: exp null off, off, off, off done vm
|
||||
; GFX9-NEXT: s_endpgm
|
||||
|
@ -223,18 +223,18 @@ define amdgpu_ps void @branch(float %arg0, float %arg1) {
|
|||
; GFX10-32-NEXT: v_cmp_eq_u32_e64 s0, 1, v0
|
||||
; GFX10-32-NEXT: s_and_saveexec_b32 s2, s0
|
||||
; GFX10-32-NEXT: s_xor_b32 s0, exec_lo, s2
|
||||
; GFX10-32-NEXT: s_cbranch_execz BB2_3
|
||||
; GFX10-32-NEXT: s_cbranch_execz .LBB2_3
|
||||
; GFX10-32-NEXT: ; %bb.1: ; %.demote
|
||||
; GFX10-32-NEXT: s_andn2_b32 s1, s1, exec_lo
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 BB2_4
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 .LBB2_4
|
||||
; GFX10-32-NEXT: ; %bb.2: ; %.demote
|
||||
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
|
||||
; GFX10-32-NEXT: BB2_3: ; %.continue
|
||||
; GFX10-32-NEXT: .LBB2_3: ; %.continue
|
||||
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s0
|
||||
; GFX10-32-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc_lo
|
||||
; GFX10-32-NEXT: exp mrt1 v0, v0, v0, v0 done vm
|
||||
; GFX10-32-NEXT: s_endpgm
|
||||
; GFX10-32-NEXT: BB2_4:
|
||||
; GFX10-32-NEXT: .LBB2_4:
|
||||
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
|
||||
; GFX10-32-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-32-NEXT: s_endpgm
|
||||
|
@ -251,18 +251,18 @@ define amdgpu_ps void @branch(float %arg0, float %arg1) {
|
|||
; GFX10-64-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v0
|
||||
; GFX10-64-NEXT: s_and_saveexec_b64 s[4:5], s[0:1]
|
||||
; GFX10-64-NEXT: s_xor_b64 s[0:1], exec, s[4:5]
|
||||
; GFX10-64-NEXT: s_cbranch_execz BB2_3
|
||||
; GFX10-64-NEXT: s_cbranch_execz .LBB2_3
|
||||
; GFX10-64-NEXT: ; %bb.1: ; %.demote
|
||||
; GFX10-64-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 BB2_4
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 .LBB2_4
|
||||
; GFX10-64-NEXT: ; %bb.2: ; %.demote
|
||||
; GFX10-64-NEXT: s_mov_b64 exec, 0
|
||||
; GFX10-64-NEXT: BB2_3: ; %.continue
|
||||
; GFX10-64-NEXT: .LBB2_3: ; %.continue
|
||||
; GFX10-64-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GFX10-64-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
|
||||
; GFX10-64-NEXT: exp mrt1 v0, v0, v0, v0 done vm
|
||||
; GFX10-64-NEXT: s_endpgm
|
||||
; GFX10-64-NEXT: BB2_4:
|
||||
; GFX10-64-NEXT: .LBB2_4:
|
||||
; GFX10-64-NEXT: s_mov_b64 exec, 0
|
||||
; GFX10-64-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-64-NEXT: s_endpgm
|
||||
|
@ -293,14 +293,14 @@ define amdgpu_ps <4 x float> @wqm_demote_1(<8 x i32> inreg %rsrc, <4 x i32> inre
|
|||
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1
|
||||
; SI-NEXT: s_and_saveexec_b64 s[14:15], vcc
|
||||
; SI-NEXT: s_xor_b64 s[14:15], exec, s[14:15]
|
||||
; SI-NEXT: s_cbranch_execz BB3_3
|
||||
; SI-NEXT: s_cbranch_execz .LBB3_3
|
||||
; SI-NEXT: ; %bb.1: ; %.demote
|
||||
; SI-NEXT: s_andn2_b64 s[12:13], s[12:13], exec
|
||||
; SI-NEXT: s_cbranch_scc0 BB3_4
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB3_4
|
||||
; SI-NEXT: ; %bb.2: ; %.demote
|
||||
; SI-NEXT: s_wqm_b64 s[16:17], s[12:13]
|
||||
; SI-NEXT: s_and_b64 exec, exec, s[16:17]
|
||||
; SI-NEXT: BB3_3: ; %.continue
|
||||
; SI-NEXT: .LBB3_3: ; %.continue
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[14:15]
|
||||
; SI-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -308,12 +308,12 @@ define amdgpu_ps <4 x float> @wqm_demote_1(<8 x i32> inreg %rsrc, <4 x i32> inre
|
|||
; SI-NEXT: s_and_b64 exec, exec, s[12:13]
|
||||
; SI-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: s_branch BB3_5
|
||||
; SI-NEXT: BB3_4:
|
||||
; SI-NEXT: s_branch .LBB3_5
|
||||
; SI-NEXT: .LBB3_4:
|
||||
; SI-NEXT: s_mov_b64 exec, 0
|
||||
; SI-NEXT: exp null off, off, off, off done vm
|
||||
; SI-NEXT: s_endpgm
|
||||
; SI-NEXT: BB3_5:
|
||||
; SI-NEXT: .LBB3_5:
|
||||
;
|
||||
; GFX9-LABEL: wqm_demote_1:
|
||||
; GFX9: ; %bb.0: ; %.entry
|
||||
|
@ -322,14 +322,14 @@ define amdgpu_ps <4 x float> @wqm_demote_1(<8 x i32> inreg %rsrc, <4 x i32> inre
|
|||
; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[14:15], vcc
|
||||
; GFX9-NEXT: s_xor_b64 s[14:15], exec, s[14:15]
|
||||
; GFX9-NEXT: s_cbranch_execz BB3_3
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB3_3
|
||||
; GFX9-NEXT: ; %bb.1: ; %.demote
|
||||
; GFX9-NEXT: s_andn2_b64 s[12:13], s[12:13], exec
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB3_4
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB3_4
|
||||
; GFX9-NEXT: ; %bb.2: ; %.demote
|
||||
; GFX9-NEXT: s_wqm_b64 s[16:17], s[12:13]
|
||||
; GFX9-NEXT: s_and_b64 exec, exec, s[16:17]
|
||||
; GFX9-NEXT: BB3_3: ; %.continue
|
||||
; GFX9-NEXT: .LBB3_3: ; %.continue
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[14:15]
|
||||
; GFX9-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -337,12 +337,12 @@ define amdgpu_ps <4 x float> @wqm_demote_1(<8 x i32> inreg %rsrc, <4 x i32> inre
|
|||
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
|
||||
; GFX9-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_branch BB3_5
|
||||
; GFX9-NEXT: BB3_4:
|
||||
; GFX9-NEXT: s_branch .LBB3_5
|
||||
; GFX9-NEXT: .LBB3_4:
|
||||
; GFX9-NEXT: s_mov_b64 exec, 0
|
||||
; GFX9-NEXT: exp null off, off, off, off done vm
|
||||
; GFX9-NEXT: s_endpgm
|
||||
; GFX9-NEXT: BB3_5:
|
||||
; GFX9-NEXT: .LBB3_5:
|
||||
;
|
||||
; GFX10-32-LABEL: wqm_demote_1:
|
||||
; GFX10-32: ; %bb.0: ; %.entry
|
||||
|
@ -351,14 +351,14 @@ define amdgpu_ps <4 x float> @wqm_demote_1(<8 x i32> inreg %rsrc, <4 x i32> inre
|
|||
; GFX10-32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v1
|
||||
; GFX10-32-NEXT: s_and_saveexec_b32 s13, vcc_lo
|
||||
; GFX10-32-NEXT: s_xor_b32 s13, exec_lo, s13
|
||||
; GFX10-32-NEXT: s_cbranch_execz BB3_3
|
||||
; GFX10-32-NEXT: s_cbranch_execz .LBB3_3
|
||||
; GFX10-32-NEXT: ; %bb.1: ; %.demote
|
||||
; GFX10-32-NEXT: s_andn2_b32 s12, s12, exec_lo
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 BB3_4
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 .LBB3_4
|
||||
; GFX10-32-NEXT: ; %bb.2: ; %.demote
|
||||
; GFX10-32-NEXT: s_wqm_b32 s14, s12
|
||||
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s14
|
||||
; GFX10-32-NEXT: BB3_3: ; %.continue
|
||||
; GFX10-32-NEXT: .LBB3_3: ; %.continue
|
||||
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s13
|
||||
; GFX10-32-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
|
||||
; GFX10-32-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -366,12 +366,12 @@ define amdgpu_ps <4 x float> @wqm_demote_1(<8 x i32> inreg %rsrc, <4 x i32> inre
|
|||
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
||||
; GFX10-32-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
||||
; GFX10-32-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-32-NEXT: s_branch BB3_5
|
||||
; GFX10-32-NEXT: BB3_4:
|
||||
; GFX10-32-NEXT: s_branch .LBB3_5
|
||||
; GFX10-32-NEXT: .LBB3_4:
|
||||
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
|
||||
; GFX10-32-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-32-NEXT: s_endpgm
|
||||
; GFX10-32-NEXT: BB3_5:
|
||||
; GFX10-32-NEXT: .LBB3_5:
|
||||
;
|
||||
; GFX10-64-LABEL: wqm_demote_1:
|
||||
; GFX10-64: ; %bb.0: ; %.entry
|
||||
|
@ -380,14 +380,14 @@ define amdgpu_ps <4 x float> @wqm_demote_1(<8 x i32> inreg %rsrc, <4 x i32> inre
|
|||
; GFX10-64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1
|
||||
; GFX10-64-NEXT: s_and_saveexec_b64 s[14:15], vcc
|
||||
; GFX10-64-NEXT: s_xor_b64 s[14:15], exec, s[14:15]
|
||||
; GFX10-64-NEXT: s_cbranch_execz BB3_3
|
||||
; GFX10-64-NEXT: s_cbranch_execz .LBB3_3
|
||||
; GFX10-64-NEXT: ; %bb.1: ; %.demote
|
||||
; GFX10-64-NEXT: s_andn2_b64 s[12:13], s[12:13], exec
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 BB3_4
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 .LBB3_4
|
||||
; GFX10-64-NEXT: ; %bb.2: ; %.demote
|
||||
; GFX10-64-NEXT: s_wqm_b64 s[16:17], s[12:13]
|
||||
; GFX10-64-NEXT: s_and_b64 exec, exec, s[16:17]
|
||||
; GFX10-64-NEXT: BB3_3: ; %.continue
|
||||
; GFX10-64-NEXT: .LBB3_3: ; %.continue
|
||||
; GFX10-64-NEXT: s_or_b64 exec, exec, s[14:15]
|
||||
; GFX10-64-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
|
||||
; GFX10-64-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -395,12 +395,12 @@ define amdgpu_ps <4 x float> @wqm_demote_1(<8 x i32> inreg %rsrc, <4 x i32> inre
|
|||
; GFX10-64-NEXT: s_and_b64 exec, exec, s[12:13]
|
||||
; GFX10-64-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
||||
; GFX10-64-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-64-NEXT: s_branch BB3_5
|
||||
; GFX10-64-NEXT: BB3_4:
|
||||
; GFX10-64-NEXT: s_branch .LBB3_5
|
||||
; GFX10-64-NEXT: .LBB3_4:
|
||||
; GFX10-64-NEXT: s_mov_b64 exec, 0
|
||||
; GFX10-64-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-64-NEXT: s_endpgm
|
||||
; GFX10-64-NEXT: BB3_5:
|
||||
; GFX10-64-NEXT: .LBB3_5:
|
||||
.entry:
|
||||
%z.cmp = fcmp olt float %z, 0.0
|
||||
br i1 %z.cmp, label %.continue, label %.demote
|
||||
|
@ -429,25 +429,25 @@ define amdgpu_ps <4 x float> @wqm_demote_2(<8 x i32> inreg %rsrc, <4 x i32> inre
|
|||
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
||||
; SI-NEXT: s_and_saveexec_b64 s[14:15], vcc
|
||||
; SI-NEXT: s_xor_b64 s[14:15], exec, s[14:15]
|
||||
; SI-NEXT: s_cbranch_execz BB4_3
|
||||
; SI-NEXT: s_cbranch_execz .LBB4_3
|
||||
; SI-NEXT: ; %bb.1: ; %.demote
|
||||
; SI-NEXT: s_andn2_b64 s[12:13], s[12:13], exec
|
||||
; SI-NEXT: s_cbranch_scc0 BB4_4
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB4_4
|
||||
; SI-NEXT: ; %bb.2: ; %.demote
|
||||
; SI-NEXT: s_wqm_b64 s[16:17], s[12:13]
|
||||
; SI-NEXT: s_and_b64 exec, exec, s[16:17]
|
||||
; SI-NEXT: BB4_3: ; %.continue
|
||||
; SI-NEXT: .LBB4_3: ; %.continue
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[14:15]
|
||||
; SI-NEXT: v_add_f32_e32 v0, v0, v0
|
||||
; SI-NEXT: s_and_b64 exec, exec, s[12:13]
|
||||
; SI-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: s_branch BB4_5
|
||||
; SI-NEXT: BB4_4:
|
||||
; SI-NEXT: s_branch .LBB4_5
|
||||
; SI-NEXT: .LBB4_4:
|
||||
; SI-NEXT: s_mov_b64 exec, 0
|
||||
; SI-NEXT: exp null off, off, off, off done vm
|
||||
; SI-NEXT: s_endpgm
|
||||
; SI-NEXT: BB4_5:
|
||||
; SI-NEXT: .LBB4_5:
|
||||
;
|
||||
; GFX9-LABEL: wqm_demote_2:
|
||||
; GFX9: ; %bb.0: ; %.entry
|
||||
|
@ -458,25 +458,25 @@ define amdgpu_ps <4 x float> @wqm_demote_2(<8 x i32> inreg %rsrc, <4 x i32> inre
|
|||
; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[14:15], vcc
|
||||
; GFX9-NEXT: s_xor_b64 s[14:15], exec, s[14:15]
|
||||
; GFX9-NEXT: s_cbranch_execz BB4_3
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB4_3
|
||||
; GFX9-NEXT: ; %bb.1: ; %.demote
|
||||
; GFX9-NEXT: s_andn2_b64 s[12:13], s[12:13], exec
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB4_4
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB4_4
|
||||
; GFX9-NEXT: ; %bb.2: ; %.demote
|
||||
; GFX9-NEXT: s_wqm_b64 s[16:17], s[12:13]
|
||||
; GFX9-NEXT: s_and_b64 exec, exec, s[16:17]
|
||||
; GFX9-NEXT: BB4_3: ; %.continue
|
||||
; GFX9-NEXT: .LBB4_3: ; %.continue
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[14:15]
|
||||
; GFX9-NEXT: v_add_f32_e32 v0, v0, v0
|
||||
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
|
||||
; GFX9-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_branch BB4_5
|
||||
; GFX9-NEXT: BB4_4:
|
||||
; GFX9-NEXT: s_branch .LBB4_5
|
||||
; GFX9-NEXT: .LBB4_4:
|
||||
; GFX9-NEXT: s_mov_b64 exec, 0
|
||||
; GFX9-NEXT: exp null off, off, off, off done vm
|
||||
; GFX9-NEXT: s_endpgm
|
||||
; GFX9-NEXT: BB4_5:
|
||||
; GFX9-NEXT: .LBB4_5:
|
||||
;
|
||||
; GFX10-32-LABEL: wqm_demote_2:
|
||||
; GFX10-32: ; %bb.0: ; %.entry
|
||||
|
@ -487,25 +487,25 @@ define amdgpu_ps <4 x float> @wqm_demote_2(<8 x i32> inreg %rsrc, <4 x i32> inre
|
|||
; GFX10-32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
|
||||
; GFX10-32-NEXT: s_and_saveexec_b32 s13, vcc_lo
|
||||
; GFX10-32-NEXT: s_xor_b32 s13, exec_lo, s13
|
||||
; GFX10-32-NEXT: s_cbranch_execz BB4_3
|
||||
; GFX10-32-NEXT: s_cbranch_execz .LBB4_3
|
||||
; GFX10-32-NEXT: ; %bb.1: ; %.demote
|
||||
; GFX10-32-NEXT: s_andn2_b32 s12, s12, exec_lo
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 BB4_4
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 .LBB4_4
|
||||
; GFX10-32-NEXT: ; %bb.2: ; %.demote
|
||||
; GFX10-32-NEXT: s_wqm_b32 s14, s12
|
||||
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s14
|
||||
; GFX10-32-NEXT: BB4_3: ; %.continue
|
||||
; GFX10-32-NEXT: .LBB4_3: ; %.continue
|
||||
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s13
|
||||
; GFX10-32-NEXT: v_add_f32_e32 v0, v0, v0
|
||||
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
||||
; GFX10-32-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
||||
; GFX10-32-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-32-NEXT: s_branch BB4_5
|
||||
; GFX10-32-NEXT: BB4_4:
|
||||
; GFX10-32-NEXT: s_branch .LBB4_5
|
||||
; GFX10-32-NEXT: .LBB4_4:
|
||||
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
|
||||
; GFX10-32-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-32-NEXT: s_endpgm
|
||||
; GFX10-32-NEXT: BB4_5:
|
||||
; GFX10-32-NEXT: .LBB4_5:
|
||||
;
|
||||
; GFX10-64-LABEL: wqm_demote_2:
|
||||
; GFX10-64: ; %bb.0: ; %.entry
|
||||
|
@ -516,25 +516,25 @@ define amdgpu_ps <4 x float> @wqm_demote_2(<8 x i32> inreg %rsrc, <4 x i32> inre
|
|||
; GFX10-64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
||||
; GFX10-64-NEXT: s_and_saveexec_b64 s[14:15], vcc
|
||||
; GFX10-64-NEXT: s_xor_b64 s[14:15], exec, s[14:15]
|
||||
; GFX10-64-NEXT: s_cbranch_execz BB4_3
|
||||
; GFX10-64-NEXT: s_cbranch_execz .LBB4_3
|
||||
; GFX10-64-NEXT: ; %bb.1: ; %.demote
|
||||
; GFX10-64-NEXT: s_andn2_b64 s[12:13], s[12:13], exec
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 BB4_4
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 .LBB4_4
|
||||
; GFX10-64-NEXT: ; %bb.2: ; %.demote
|
||||
; GFX10-64-NEXT: s_wqm_b64 s[16:17], s[12:13]
|
||||
; GFX10-64-NEXT: s_and_b64 exec, exec, s[16:17]
|
||||
; GFX10-64-NEXT: BB4_3: ; %.continue
|
||||
; GFX10-64-NEXT: .LBB4_3: ; %.continue
|
||||
; GFX10-64-NEXT: s_or_b64 exec, exec, s[14:15]
|
||||
; GFX10-64-NEXT: v_add_f32_e32 v0, v0, v0
|
||||
; GFX10-64-NEXT: s_and_b64 exec, exec, s[12:13]
|
||||
; GFX10-64-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
||||
; GFX10-64-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-64-NEXT: s_branch BB4_5
|
||||
; GFX10-64-NEXT: BB4_4:
|
||||
; GFX10-64-NEXT: s_branch .LBB4_5
|
||||
; GFX10-64-NEXT: .LBB4_4:
|
||||
; GFX10-64-NEXT: s_mov_b64 exec, 0
|
||||
; GFX10-64-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-64-NEXT: s_endpgm
|
||||
; GFX10-64-NEXT: BB4_5:
|
||||
; GFX10-64-NEXT: .LBB4_5:
|
||||
.entry:
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%tex0 = extractelement <4 x float> %tex, i32 0
|
||||
|
@ -563,7 +563,7 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
|
|||
; SI-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
|
||||
; SI-NEXT: s_xor_b64 s[14:15], vcc, exec
|
||||
; SI-NEXT: s_andn2_b64 s[12:13], s[12:13], s[14:15]
|
||||
; SI-NEXT: s_cbranch_scc0 BB5_2
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB5_2
|
||||
; SI-NEXT: ; %bb.1: ; %.entry
|
||||
; SI-NEXT: s_wqm_b64 s[14:15], s[12:13]
|
||||
; SI-NEXT: s_and_b64 exec, exec, s[14:15]
|
||||
|
@ -571,12 +571,12 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
|
|||
; SI-NEXT: s_and_b64 exec, exec, s[12:13]
|
||||
; SI-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: s_branch BB5_3
|
||||
; SI-NEXT: BB5_2:
|
||||
; SI-NEXT: s_branch .LBB5_3
|
||||
; SI-NEXT: .LBB5_2:
|
||||
; SI-NEXT: s_mov_b64 exec, 0
|
||||
; SI-NEXT: exp null off, off, off, off done vm
|
||||
; SI-NEXT: s_endpgm
|
||||
; SI-NEXT: BB5_3:
|
||||
; SI-NEXT: .LBB5_3:
|
||||
;
|
||||
; GFX9-LABEL: wqm_demote_dynamic:
|
||||
; GFX9: ; %bb.0: ; %.entry
|
||||
|
@ -587,7 +587,7 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
|
|||
; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
|
||||
; GFX9-NEXT: s_xor_b64 s[14:15], vcc, exec
|
||||
; GFX9-NEXT: s_andn2_b64 s[12:13], s[12:13], s[14:15]
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB5_2
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB5_2
|
||||
; GFX9-NEXT: ; %bb.1: ; %.entry
|
||||
; GFX9-NEXT: s_wqm_b64 s[14:15], s[12:13]
|
||||
; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
|
||||
|
@ -595,12 +595,12 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
|
|||
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
|
||||
; GFX9-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_branch BB5_3
|
||||
; GFX9-NEXT: BB5_2:
|
||||
; GFX9-NEXT: s_branch .LBB5_3
|
||||
; GFX9-NEXT: .LBB5_2:
|
||||
; GFX9-NEXT: s_mov_b64 exec, 0
|
||||
; GFX9-NEXT: exp null off, off, off, off done vm
|
||||
; GFX9-NEXT: s_endpgm
|
||||
; GFX9-NEXT: BB5_3:
|
||||
; GFX9-NEXT: .LBB5_3:
|
||||
;
|
||||
; GFX10-32-LABEL: wqm_demote_dynamic:
|
||||
; GFX10-32: ; %bb.0: ; %.entry
|
||||
|
@ -611,7 +611,7 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
|
|||
; GFX10-32-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v0
|
||||
; GFX10-32-NEXT: s_xor_b32 s13, vcc_lo, exec_lo
|
||||
; GFX10-32-NEXT: s_andn2_b32 s12, s12, s13
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 BB5_2
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 .LBB5_2
|
||||
; GFX10-32-NEXT: ; %bb.1: ; %.entry
|
||||
; GFX10-32-NEXT: s_wqm_b32 s13, s12
|
||||
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s13
|
||||
|
@ -619,12 +619,12 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
|
|||
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
||||
; GFX10-32-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
||||
; GFX10-32-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-32-NEXT: s_branch BB5_3
|
||||
; GFX10-32-NEXT: BB5_2:
|
||||
; GFX10-32-NEXT: s_branch .LBB5_3
|
||||
; GFX10-32-NEXT: .LBB5_2:
|
||||
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
|
||||
; GFX10-32-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-32-NEXT: s_endpgm
|
||||
; GFX10-32-NEXT: BB5_3:
|
||||
; GFX10-32-NEXT: .LBB5_3:
|
||||
;
|
||||
; GFX10-64-LABEL: wqm_demote_dynamic:
|
||||
; GFX10-64: ; %bb.0: ; %.entry
|
||||
|
@ -635,7 +635,7 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
|
|||
; GFX10-64-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
|
||||
; GFX10-64-NEXT: s_xor_b64 s[14:15], vcc, exec
|
||||
; GFX10-64-NEXT: s_andn2_b64 s[12:13], s[12:13], s[14:15]
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 BB5_2
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 .LBB5_2
|
||||
; GFX10-64-NEXT: ; %bb.1: ; %.entry
|
||||
; GFX10-64-NEXT: s_wqm_b64 s[14:15], s[12:13]
|
||||
; GFX10-64-NEXT: s_and_b64 exec, exec, s[14:15]
|
||||
|
@ -643,12 +643,12 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
|
|||
; GFX10-64-NEXT: s_and_b64 exec, exec, s[12:13]
|
||||
; GFX10-64-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
||||
; GFX10-64-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-64-NEXT: s_branch BB5_3
|
||||
; GFX10-64-NEXT: BB5_2:
|
||||
; GFX10-64-NEXT: s_branch .LBB5_3
|
||||
; GFX10-64-NEXT: .LBB5_2:
|
||||
; GFX10-64-NEXT: s_mov_b64 exec, 0
|
||||
; GFX10-64-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-64-NEXT: s_endpgm
|
||||
; GFX10-64-NEXT: BB5_3:
|
||||
; GFX10-64-NEXT: .LBB5_3:
|
||||
.entry:
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%tex0 = extractelement <4 x float> %tex, i32 0
|
||||
|
@ -671,14 +671,14 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
|
|||
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
|
||||
; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; SI-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
||||
; SI-NEXT: s_cbranch_execz BB6_3
|
||||
; SI-NEXT: s_cbranch_execz .LBB6_3
|
||||
; SI-NEXT: ; %bb.1: ; %.demote0
|
||||
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; SI-NEXT: s_cbranch_scc0 BB6_7
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB6_7
|
||||
; SI-NEXT: ; %bb.2: ; %.demote0
|
||||
; SI-NEXT: s_wqm_b64 s[4:5], s[0:1]
|
||||
; SI-NEXT: s_and_b64 exec, exec, s[4:5]
|
||||
; SI-NEXT: BB6_3: ; %.continue0
|
||||
; SI-NEXT: .LBB6_3: ; %.continue0
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; SI-NEXT: s_mov_b64 s[2:3], s[0:1]
|
||||
; SI-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s[2:3]
|
||||
|
@ -694,19 +694,19 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
|
|||
; SI-NEXT: s_or_b64 s[2:3], s[2:3], vcc
|
||||
; SI-NEXT: s_and_saveexec_b64 s[4:5], s[2:3]
|
||||
; SI-NEXT: s_xor_b64 s[2:3], exec, s[4:5]
|
||||
; SI-NEXT: s_cbranch_execz BB6_6
|
||||
; SI-NEXT: s_cbranch_execz .LBB6_6
|
||||
; SI-NEXT: ; %bb.4: ; %.demote1
|
||||
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; SI-NEXT: s_cbranch_scc0 BB6_7
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB6_7
|
||||
; SI-NEXT: ; %bb.5: ; %.demote1
|
||||
; SI-NEXT: s_mov_b64 exec, 0
|
||||
; SI-NEXT: BB6_6: ; %.continue1
|
||||
; SI-NEXT: .LBB6_6: ; %.continue1
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; SI-NEXT: v_bfrev_b32_e32 v0, 60
|
||||
; SI-NEXT: v_mov_b32_e32 v1, 0x3c00
|
||||
; SI-NEXT: exp mrt0 v1, v1, v0, v0 done compr vm
|
||||
; SI-NEXT: s_endpgm
|
||||
; SI-NEXT: BB6_7:
|
||||
; SI-NEXT: .LBB6_7:
|
||||
; SI-NEXT: s_mov_b64 exec, 0
|
||||
; SI-NEXT: exp null off, off, off, off done vm
|
||||
; SI-NEXT: s_endpgm
|
||||
|
@ -719,14 +719,14 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
|
|||
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; GFX9-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
||||
; GFX9-NEXT: s_cbranch_execz BB6_3
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB6_3
|
||||
; GFX9-NEXT: ; %bb.1: ; %.demote0
|
||||
; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB6_7
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB6_7
|
||||
; GFX9-NEXT: ; %bb.2: ; %.demote0
|
||||
; GFX9-NEXT: s_wqm_b64 s[4:5], s[0:1]
|
||||
; GFX9-NEXT: s_and_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: BB6_3: ; %.continue0
|
||||
; GFX9-NEXT: .LBB6_3: ; %.continue0
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX9-NEXT: s_mov_b64 s[2:3], s[0:1]
|
||||
; GFX9-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s[2:3]
|
||||
|
@ -742,19 +742,19 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
|
|||
; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], vcc
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], s[2:3]
|
||||
; GFX9-NEXT: s_xor_b64 s[2:3], exec, s[4:5]
|
||||
; GFX9-NEXT: s_cbranch_execz BB6_6
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB6_6
|
||||
; GFX9-NEXT: ; %bb.4: ; %.demote1
|
||||
; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB6_7
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB6_7
|
||||
; GFX9-NEXT: ; %bb.5: ; %.demote1
|
||||
; GFX9-NEXT: s_mov_b64 exec, 0
|
||||
; GFX9-NEXT: BB6_6: ; %.continue1
|
||||
; GFX9-NEXT: .LBB6_6: ; %.continue1
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0x3c00
|
||||
; GFX9-NEXT: v_bfrev_b32_e32 v1, 60
|
||||
; GFX9-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm
|
||||
; GFX9-NEXT: s_endpgm
|
||||
; GFX9-NEXT: BB6_7:
|
||||
; GFX9-NEXT: .LBB6_7:
|
||||
; GFX9-NEXT: s_mov_b64 exec, 0
|
||||
; GFX9-NEXT: exp null off, off, off, off done vm
|
||||
; GFX9-NEXT: s_endpgm
|
||||
|
@ -767,14 +767,14 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
|
|||
; GFX10-32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
|
||||
; GFX10-32-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX10-32-NEXT: s_xor_b32 s1, exec_lo, s1
|
||||
; GFX10-32-NEXT: s_cbranch_execz BB6_3
|
||||
; GFX10-32-NEXT: s_cbranch_execz .LBB6_3
|
||||
; GFX10-32-NEXT: ; %bb.1: ; %.demote0
|
||||
; GFX10-32-NEXT: s_andn2_b32 s0, s0, exec_lo
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 BB6_7
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 .LBB6_7
|
||||
; GFX10-32-NEXT: ; %bb.2: ; %.demote0
|
||||
; GFX10-32-NEXT: s_wqm_b32 s2, s0
|
||||
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s2
|
||||
; GFX10-32-NEXT: BB6_3: ; %.continue0
|
||||
; GFX10-32-NEXT: .LBB6_3: ; %.continue0
|
||||
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s1
|
||||
; GFX10-32-NEXT: s_mov_b32 s1, s0
|
||||
; GFX10-32-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s1
|
||||
|
@ -788,19 +788,19 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
|
|||
; GFX10-32-NEXT: s_or_b32 s1, s1, vcc_lo
|
||||
; GFX10-32-NEXT: s_and_saveexec_b32 s2, s1
|
||||
; GFX10-32-NEXT: s_xor_b32 s1, exec_lo, s2
|
||||
; GFX10-32-NEXT: s_cbranch_execz BB6_6
|
||||
; GFX10-32-NEXT: s_cbranch_execz .LBB6_6
|
||||
; GFX10-32-NEXT: ; %bb.4: ; %.demote1
|
||||
; GFX10-32-NEXT: s_andn2_b32 s0, s0, exec_lo
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 BB6_7
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 .LBB6_7
|
||||
; GFX10-32-NEXT: ; %bb.5: ; %.demote1
|
||||
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
|
||||
; GFX10-32-NEXT: BB6_6: ; %.continue1
|
||||
; GFX10-32-NEXT: .LBB6_6: ; %.continue1
|
||||
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s1
|
||||
; GFX10-32-NEXT: v_mov_b32_e32 v0, 0x3c00
|
||||
; GFX10-32-NEXT: v_bfrev_b32_e32 v1, 60
|
||||
; GFX10-32-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm
|
||||
; GFX10-32-NEXT: s_endpgm
|
||||
; GFX10-32-NEXT: BB6_7:
|
||||
; GFX10-32-NEXT: .LBB6_7:
|
||||
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
|
||||
; GFX10-32-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-32-NEXT: s_endpgm
|
||||
|
@ -813,14 +813,14 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
|
|||
; GFX10-64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
|
||||
; GFX10-64-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; GFX10-64-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
||||
; GFX10-64-NEXT: s_cbranch_execz BB6_3
|
||||
; GFX10-64-NEXT: s_cbranch_execz .LBB6_3
|
||||
; GFX10-64-NEXT: ; %bb.1: ; %.demote0
|
||||
; GFX10-64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 BB6_7
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 .LBB6_7
|
||||
; GFX10-64-NEXT: ; %bb.2: ; %.demote0
|
||||
; GFX10-64-NEXT: s_wqm_b64 s[4:5], s[0:1]
|
||||
; GFX10-64-NEXT: s_and_b64 exec, exec, s[4:5]
|
||||
; GFX10-64-NEXT: BB6_3: ; %.continue0
|
||||
; GFX10-64-NEXT: .LBB6_3: ; %.continue0
|
||||
; GFX10-64-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX10-64-NEXT: s_mov_b64 s[2:3], s[0:1]
|
||||
; GFX10-64-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s[2:3]
|
||||
|
@ -834,19 +834,19 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
|
|||
; GFX10-64-NEXT: s_or_b64 s[2:3], s[2:3], vcc
|
||||
; GFX10-64-NEXT: s_and_saveexec_b64 s[4:5], s[2:3]
|
||||
; GFX10-64-NEXT: s_xor_b64 s[2:3], exec, s[4:5]
|
||||
; GFX10-64-NEXT: s_cbranch_execz BB6_6
|
||||
; GFX10-64-NEXT: s_cbranch_execz .LBB6_6
|
||||
; GFX10-64-NEXT: ; %bb.4: ; %.demote1
|
||||
; GFX10-64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 BB6_7
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 .LBB6_7
|
||||
; GFX10-64-NEXT: ; %bb.5: ; %.demote1
|
||||
; GFX10-64-NEXT: s_mov_b64 exec, 0
|
||||
; GFX10-64-NEXT: BB6_6: ; %.continue1
|
||||
; GFX10-64-NEXT: .LBB6_6: ; %.continue1
|
||||
; GFX10-64-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX10-64-NEXT: v_mov_b32_e32 v0, 0x3c00
|
||||
; GFX10-64-NEXT: v_bfrev_b32_e32 v1, 60
|
||||
; GFX10-64-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm
|
||||
; GFX10-64-NEXT: s_endpgm
|
||||
; GFX10-64-NEXT: BB6_7:
|
||||
; GFX10-64-NEXT: .LBB6_7:
|
||||
; GFX10-64-NEXT: s_mov_b64 exec, 0
|
||||
; GFX10-64-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-64-NEXT: s_endpgm
|
||||
|
@ -895,26 +895,26 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
|
|||
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
|
||||
; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; SI-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
||||
; SI-NEXT: s_cbranch_execz BB7_3
|
||||
; SI-NEXT: s_cbranch_execz .LBB7_3
|
||||
; SI-NEXT: ; %bb.1: ; %.demote0
|
||||
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; SI-NEXT: s_cbranch_scc0 BB7_9
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB7_9
|
||||
; SI-NEXT: ; %bb.2: ; %.demote0
|
||||
; SI-NEXT: s_wqm_b64 s[4:5], s[0:1]
|
||||
; SI-NEXT: s_and_b64 exec, exec, s[4:5]
|
||||
; SI-NEXT: BB7_3: ; %.continue0.preheader
|
||||
; SI-NEXT: .LBB7_3: ; %.continue0.preheader
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; SI-NEXT: s_mov_b64 s[2:3], 0
|
||||
; SI-NEXT: s_branch BB7_5
|
||||
; SI-NEXT: BB7_4: ; %.continue1
|
||||
; SI-NEXT: s_branch .LBB7_5
|
||||
; SI-NEXT: .LBB7_4: ; %.continue1
|
||||
; SI-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; SI-NEXT: s_add_i32 s6, s6, 1
|
||||
; SI-NEXT: v_cmp_ge_i32_e32 vcc, s6, v1
|
||||
; SI-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; SI-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; SI-NEXT: s_cbranch_execz BB7_8
|
||||
; SI-NEXT: BB7_5: ; %.continue0
|
||||
; SI-NEXT: s_cbranch_execz .LBB7_8
|
||||
; SI-NEXT: .LBB7_5: ; %.continue0
|
||||
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; SI-NEXT: v_mov_b32_e32 v0, s6
|
||||
; SI-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
|
@ -930,24 +930,24 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
|
|||
; SI-NEXT: s_or_b64 s[4:5], s[4:5], vcc
|
||||
; SI-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
|
||||
; SI-NEXT: s_xor_b64 s[4:5], exec, s[8:9]
|
||||
; SI-NEXT: s_cbranch_execz BB7_4
|
||||
; SI-NEXT: s_cbranch_execz .LBB7_4
|
||||
; SI-NEXT: ; %bb.6: ; %.demote1
|
||||
; SI-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; SI-NEXT: s_cbranch_scc0 BB7_9
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB7_9
|
||||
; SI-NEXT: ; %bb.7: ; %.demote1
|
||||
; SI-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; SI-NEXT: s_wqm_b64 s[8:9], s[0:1]
|
||||
; SI-NEXT: s_and_b64 exec, exec, s[8:9]
|
||||
; SI-NEXT: s_branch BB7_4
|
||||
; SI-NEXT: BB7_8: ; %.return
|
||||
; SI-NEXT: s_branch .LBB7_4
|
||||
; SI-NEXT: .LBB7_8: ; %.return
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; SI-NEXT: s_and_b64 exec, exec, s[0:1]
|
||||
; SI-NEXT: v_bfrev_b32_e32 v0, 60
|
||||
; SI-NEXT: v_mov_b32_e32 v1, 0x3c00
|
||||
; SI-NEXT: exp mrt0 v1, v1, v0, v0 done compr vm
|
||||
; SI-NEXT: s_endpgm
|
||||
; SI-NEXT: BB7_9:
|
||||
; SI-NEXT: .LBB7_9:
|
||||
; SI-NEXT: s_mov_b64 exec, 0
|
||||
; SI-NEXT: exp null off, off, off, off done vm
|
||||
; SI-NEXT: s_endpgm
|
||||
|
@ -961,26 +961,26 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
|
|||
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; GFX9-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
||||
; GFX9-NEXT: s_cbranch_execz BB7_3
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB7_3
|
||||
; GFX9-NEXT: ; %bb.1: ; %.demote0
|
||||
; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB7_9
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB7_9
|
||||
; GFX9-NEXT: ; %bb.2: ; %.demote0
|
||||
; GFX9-NEXT: s_wqm_b64 s[4:5], s[0:1]
|
||||
; GFX9-NEXT: s_and_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: BB7_3: ; %.continue0.preheader
|
||||
; GFX9-NEXT: .LBB7_3: ; %.continue0.preheader
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX9-NEXT: s_mov_b64 s[2:3], 0
|
||||
; GFX9-NEXT: s_branch BB7_5
|
||||
; GFX9-NEXT: BB7_4: ; %.continue1
|
||||
; GFX9-NEXT: s_branch .LBB7_5
|
||||
; GFX9-NEXT: .LBB7_4: ; %.continue1
|
||||
; GFX9-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: s_add_i32 s6, s6, 1
|
||||
; GFX9-NEXT: v_cmp_ge_i32_e32 vcc, s6, v1
|
||||
; GFX9-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX9-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX9-NEXT: s_cbranch_execz BB7_8
|
||||
; GFX9-NEXT: BB7_5: ; %.continue0
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB7_8
|
||||
; GFX9-NEXT: .LBB7_5: ; %.continue0
|
||||
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, s6
|
||||
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
|
@ -996,24 +996,24 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
|
|||
; GFX9-NEXT: s_or_b64 s[4:5], s[4:5], vcc
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
|
||||
; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[8:9]
|
||||
; GFX9-NEXT: s_cbranch_execz BB7_4
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB7_4
|
||||
; GFX9-NEXT: ; %bb.6: ; %.demote1
|
||||
; GFX9-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB7_9
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB7_9
|
||||
; GFX9-NEXT: ; %bb.7: ; %.demote1
|
||||
; GFX9-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; GFX9-NEXT: s_wqm_b64 s[8:9], s[0:1]
|
||||
; GFX9-NEXT: s_and_b64 exec, exec, s[8:9]
|
||||
; GFX9-NEXT: s_branch BB7_4
|
||||
; GFX9-NEXT: BB7_8: ; %.return
|
||||
; GFX9-NEXT: s_branch .LBB7_4
|
||||
; GFX9-NEXT: .LBB7_8: ; %.return
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX9-NEXT: s_and_b64 exec, exec, s[0:1]
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0x3c00
|
||||
; GFX9-NEXT: v_bfrev_b32_e32 v1, 60
|
||||
; GFX9-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm
|
||||
; GFX9-NEXT: s_endpgm
|
||||
; GFX9-NEXT: BB7_9:
|
||||
; GFX9-NEXT: .LBB7_9:
|
||||
; GFX9-NEXT: s_mov_b64 exec, 0
|
||||
; GFX9-NEXT: exp null off, off, off, off done vm
|
||||
; GFX9-NEXT: s_endpgm
|
||||
|
@ -1027,26 +1027,26 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
|
|||
; GFX10-32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
|
||||
; GFX10-32-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX10-32-NEXT: s_xor_b32 s1, exec_lo, s1
|
||||
; GFX10-32-NEXT: s_cbranch_execz BB7_3
|
||||
; GFX10-32-NEXT: s_cbranch_execz .LBB7_3
|
||||
; GFX10-32-NEXT: ; %bb.1: ; %.demote0
|
||||
; GFX10-32-NEXT: s_andn2_b32 s0, s0, exec_lo
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 BB7_9
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 .LBB7_9
|
||||
; GFX10-32-NEXT: ; %bb.2: ; %.demote0
|
||||
; GFX10-32-NEXT: s_wqm_b32 s3, s0
|
||||
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s3
|
||||
; GFX10-32-NEXT: BB7_3: ; %.continue0.preheader
|
||||
; GFX10-32-NEXT: .LBB7_3: ; %.continue0.preheader
|
||||
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s1
|
||||
; GFX10-32-NEXT: s_mov_b32 s3, 0
|
||||
; GFX10-32-NEXT: s_branch BB7_5
|
||||
; GFX10-32-NEXT: BB7_4: ; %.continue1
|
||||
; GFX10-32-NEXT: s_branch .LBB7_5
|
||||
; GFX10-32-NEXT: .LBB7_4: ; %.continue1
|
||||
; GFX10-32-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s1
|
||||
; GFX10-32-NEXT: s_add_i32 s3, s3, 1
|
||||
; GFX10-32-NEXT: v_cmp_ge_i32_e32 vcc_lo, s3, v1
|
||||
; GFX10-32-NEXT: s_or_b32 s2, vcc_lo, s2
|
||||
; GFX10-32-NEXT: s_andn2_b32 exec_lo, exec_lo, s2
|
||||
; GFX10-32-NEXT: s_cbranch_execz BB7_8
|
||||
; GFX10-32-NEXT: BB7_5: ; %.continue0
|
||||
; GFX10-32-NEXT: s_cbranch_execz .LBB7_8
|
||||
; GFX10-32-NEXT: .LBB7_5: ; %.continue0
|
||||
; GFX10-32-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-32-NEXT: s_mov_b32 s1, s0
|
||||
; GFX10-32-NEXT: v_cndmask_b32_e64 v0, s3, 0, s1
|
||||
|
@ -1059,24 +1059,24 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
|
|||
; GFX10-32-NEXT: s_or_b32 s1, s1, vcc_lo
|
||||
; GFX10-32-NEXT: s_and_saveexec_b32 s4, s1
|
||||
; GFX10-32-NEXT: s_xor_b32 s1, exec_lo, s4
|
||||
; GFX10-32-NEXT: s_cbranch_execz BB7_4
|
||||
; GFX10-32-NEXT: s_cbranch_execz .LBB7_4
|
||||
; GFX10-32-NEXT: ; %bb.6: ; %.demote1
|
||||
; GFX10-32-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; GFX10-32-NEXT: s_andn2_b32 s0, s0, exec_lo
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 BB7_9
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 .LBB7_9
|
||||
; GFX10-32-NEXT: ; %bb.7: ; %.demote1
|
||||
; GFX10-32-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; GFX10-32-NEXT: s_wqm_b32 s4, s0
|
||||
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s4
|
||||
; GFX10-32-NEXT: s_branch BB7_4
|
||||
; GFX10-32-NEXT: BB7_8: ; %.return
|
||||
; GFX10-32-NEXT: s_branch .LBB7_4
|
||||
; GFX10-32-NEXT: .LBB7_8: ; %.return
|
||||
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s2
|
||||
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s0
|
||||
; GFX10-32-NEXT: v_mov_b32_e32 v0, 0x3c00
|
||||
; GFX10-32-NEXT: v_bfrev_b32_e32 v1, 60
|
||||
; GFX10-32-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm
|
||||
; GFX10-32-NEXT: s_endpgm
|
||||
; GFX10-32-NEXT: BB7_9:
|
||||
; GFX10-32-NEXT: .LBB7_9:
|
||||
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
|
||||
; GFX10-32-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-32-NEXT: s_endpgm
|
||||
|
@ -1090,26 +1090,26 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
|
|||
; GFX10-64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
|
||||
; GFX10-64-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; GFX10-64-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
||||
; GFX10-64-NEXT: s_cbranch_execz BB7_3
|
||||
; GFX10-64-NEXT: s_cbranch_execz .LBB7_3
|
||||
; GFX10-64-NEXT: ; %bb.1: ; %.demote0
|
||||
; GFX10-64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 BB7_9
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 .LBB7_9
|
||||
; GFX10-64-NEXT: ; %bb.2: ; %.demote0
|
||||
; GFX10-64-NEXT: s_wqm_b64 s[4:5], s[0:1]
|
||||
; GFX10-64-NEXT: s_and_b64 exec, exec, s[4:5]
|
||||
; GFX10-64-NEXT: BB7_3: ; %.continue0.preheader
|
||||
; GFX10-64-NEXT: .LBB7_3: ; %.continue0.preheader
|
||||
; GFX10-64-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX10-64-NEXT: s_mov_b64 s[2:3], 0
|
||||
; GFX10-64-NEXT: s_branch BB7_5
|
||||
; GFX10-64-NEXT: BB7_4: ; %.continue1
|
||||
; GFX10-64-NEXT: s_branch .LBB7_5
|
||||
; GFX10-64-NEXT: .LBB7_4: ; %.continue1
|
||||
; GFX10-64-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; GFX10-64-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX10-64-NEXT: s_add_i32 s6, s6, 1
|
||||
; GFX10-64-NEXT: v_cmp_ge_i32_e32 vcc, s6, v1
|
||||
; GFX10-64-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX10-64-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX10-64-NEXT: s_cbranch_execz BB7_8
|
||||
; GFX10-64-NEXT: BB7_5: ; %.continue0
|
||||
; GFX10-64-NEXT: s_cbranch_execz .LBB7_8
|
||||
; GFX10-64-NEXT: .LBB7_5: ; %.continue0
|
||||
; GFX10-64-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-64-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX10-64-NEXT: v_cndmask_b32_e64 v0, s6, 0, s[4:5]
|
||||
|
@ -1122,24 +1122,24 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
|
|||
; GFX10-64-NEXT: s_or_b64 s[4:5], s[4:5], vcc
|
||||
; GFX10-64-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
|
||||
; GFX10-64-NEXT: s_xor_b64 s[4:5], exec, s[8:9]
|
||||
; GFX10-64-NEXT: s_cbranch_execz BB7_4
|
||||
; GFX10-64-NEXT: s_cbranch_execz .LBB7_4
|
||||
; GFX10-64-NEXT: ; %bb.6: ; %.demote1
|
||||
; GFX10-64-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; GFX10-64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 BB7_9
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 .LBB7_9
|
||||
; GFX10-64-NEXT: ; %bb.7: ; %.demote1
|
||||
; GFX10-64-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; GFX10-64-NEXT: s_wqm_b64 s[8:9], s[0:1]
|
||||
; GFX10-64-NEXT: s_and_b64 exec, exec, s[8:9]
|
||||
; GFX10-64-NEXT: s_branch BB7_4
|
||||
; GFX10-64-NEXT: BB7_8: ; %.return
|
||||
; GFX10-64-NEXT: s_branch .LBB7_4
|
||||
; GFX10-64-NEXT: .LBB7_8: ; %.return
|
||||
; GFX10-64-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX10-64-NEXT: s_and_b64 exec, exec, s[0:1]
|
||||
; GFX10-64-NEXT: v_mov_b32_e32 v0, 0x3c00
|
||||
; GFX10-64-NEXT: v_bfrev_b32_e32 v1, 60
|
||||
; GFX10-64-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm
|
||||
; GFX10-64-NEXT: s_endpgm
|
||||
; GFX10-64-NEXT: BB7_9:
|
||||
; GFX10-64-NEXT: .LBB7_9:
|
||||
; GFX10-64-NEXT: s_mov_b64 exec, 0
|
||||
; GFX10-64-NEXT: exp null off, off, off, off done vm
|
||||
; GFX10-64-NEXT: s_endpgm
|
||||
|
|
|
@ -29,14 +29,14 @@ define amdgpu_kernel void @local_stack_offset_uses_sp(i64 addrspace(1)* %out) {
|
|||
; MUBUF-NEXT: s_mov_b32 s6, 0
|
||||
; MUBUF-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(0)
|
||||
; MUBUF-NEXT: BB0_1: ; %loadstoreloop
|
||||
; MUBUF-NEXT: .LBB0_1: ; %loadstoreloop
|
||||
; MUBUF-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; MUBUF-NEXT: v_add_u32_e32 v3, s6, v1
|
||||
; MUBUF-NEXT: s_add_i32 s6, s6, 1
|
||||
; MUBUF-NEXT: s_cmpk_lt_u32 s6, 0x2120
|
||||
; MUBUF-NEXT: buffer_store_byte v2, v3, s[0:3], 0 offen
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(0)
|
||||
; MUBUF-NEXT: s_cbranch_scc1 BB0_1
|
||||
; MUBUF-NEXT: s_cbranch_scc1 .LBB0_1
|
||||
; MUBUF-NEXT: ; %bb.2: ; %split
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v1, 0x3000
|
||||
; MUBUF-NEXT: v_add_u32_e32 v1, 0x20d0, v1
|
||||
|
@ -66,14 +66,14 @@ define amdgpu_kernel void @local_stack_offset_uses_sp(i64 addrspace(1)* %out) {
|
|||
; FLATSCR-NEXT: s_mov_b32 s2, 0
|
||||
; FLATSCR-NEXT: scratch_store_dword off, v0, vcc_hi
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: BB0_1: ; %loadstoreloop
|
||||
; FLATSCR-NEXT: .LBB0_1: ; %loadstoreloop
|
||||
; FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; FLATSCR-NEXT: s_add_i32 s3, s2, 0x3000
|
||||
; FLATSCR-NEXT: s_add_i32 s2, s2, 1
|
||||
; FLATSCR-NEXT: s_cmpk_lt_u32 s2, 0x2120
|
||||
; FLATSCR-NEXT: scratch_store_byte off, v0, s3
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: s_cbranch_scc1 BB0_1
|
||||
; FLATSCR-NEXT: s_cbranch_scc1 .LBB0_1
|
||||
; FLATSCR-NEXT: ; %bb.2: ; %split
|
||||
; FLATSCR-NEXT: s_movk_i32 s2, 0x2000
|
||||
; FLATSCR-NEXT: s_addk_i32 s2, 0x3000
|
||||
|
@ -119,14 +119,14 @@ define void @func_local_stack_offset_uses_sp(i64 addrspace(1)* %out) {
|
|||
; MUBUF-NEXT: s_add_i32 s32, s32, 0x180000
|
||||
; MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], s33
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(0)
|
||||
; MUBUF-NEXT: BB1_1: ; %loadstoreloop
|
||||
; MUBUF-NEXT: .LBB1_1: ; %loadstoreloop
|
||||
; MUBUF-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; MUBUF-NEXT: v_add_u32_e32 v5, s4, v3
|
||||
; MUBUF-NEXT: s_add_i32 s4, s4, 1
|
||||
; MUBUF-NEXT: s_cmpk_lt_u32 s4, 0x2120
|
||||
; MUBUF-NEXT: buffer_store_byte v4, v5, s[0:3], 0 offen
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(0)
|
||||
; MUBUF-NEXT: s_cbranch_scc1 BB1_1
|
||||
; MUBUF-NEXT: s_cbranch_scc1 .LBB1_1
|
||||
; MUBUF-NEXT: ; %bb.2: ; %split
|
||||
; MUBUF-NEXT: v_lshrrev_b32_e64 v3, 6, s33
|
||||
; MUBUF-NEXT: v_add_u32_e32 v3, 0x1000, v3
|
||||
|
@ -158,7 +158,7 @@ define void @func_local_stack_offset_uses_sp(i64 addrspace(1)* %out) {
|
|||
; FLATSCR-NEXT: s_addk_i32 s32, 0x6000
|
||||
; FLATSCR-NEXT: scratch_store_dword off, v2, s33
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: BB1_1: ; %loadstoreloop
|
||||
; FLATSCR-NEXT: .LBB1_1: ; %loadstoreloop
|
||||
; FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; FLATSCR-NEXT: s_add_i32 vcc_hi, s33, 0x1000
|
||||
; FLATSCR-NEXT: s_add_i32 s1, s0, vcc_hi
|
||||
|
@ -166,7 +166,7 @@ define void @func_local_stack_offset_uses_sp(i64 addrspace(1)* %out) {
|
|||
; FLATSCR-NEXT: s_cmpk_lt_u32 s0, 0x2120
|
||||
; FLATSCR-NEXT: scratch_store_byte off, v2, s1
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: s_cbranch_scc1 BB1_1
|
||||
; FLATSCR-NEXT: s_cbranch_scc1 .LBB1_1
|
||||
; FLATSCR-NEXT: ; %bb.2: ; %split
|
||||
; FLATSCR-NEXT: s_movk_i32 s0, 0x2000
|
||||
; FLATSCR-NEXT: s_add_i32 s1, s33, 0x1000
|
||||
|
@ -210,14 +210,14 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(<3 x i64> addrspace(1
|
|||
; MUBUF-NEXT: s_mov_b32 s6, 0
|
||||
; MUBUF-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(0)
|
||||
; MUBUF-NEXT: BB2_1: ; %loadstoreloop
|
||||
; MUBUF-NEXT: .LBB2_1: ; %loadstoreloop
|
||||
; MUBUF-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; MUBUF-NEXT: v_add_u32_e32 v2, s6, v0
|
||||
; MUBUF-NEXT: s_add_i32 s6, s6, 1
|
||||
; MUBUF-NEXT: s_cmpk_lt_u32 s6, 0x2120
|
||||
; MUBUF-NEXT: buffer_store_byte v1, v2, s[0:3], 0 offen
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(0)
|
||||
; MUBUF-NEXT: s_cbranch_scc1 BB2_1
|
||||
; MUBUF-NEXT: s_cbranch_scc1 .LBB2_1
|
||||
; MUBUF-NEXT: ; %bb.2: ; %split
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v0, 0x4000
|
||||
; MUBUF-NEXT: v_or_b32_e32 v2, 0x12d4, v0
|
||||
|
@ -280,14 +280,14 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(<3 x i64> addrspace(1
|
|||
; FLATSCR-NEXT: s_mov_b32 s2, 0
|
||||
; FLATSCR-NEXT: scratch_store_dword off, v0, vcc_hi offset:1024
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: BB2_1: ; %loadstoreloop
|
||||
; FLATSCR-NEXT: .LBB2_1: ; %loadstoreloop
|
||||
; FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; FLATSCR-NEXT: s_add_i32 s3, s2, 0x2000
|
||||
; FLATSCR-NEXT: s_add_i32 s2, s2, 1
|
||||
; FLATSCR-NEXT: s_cmpk_lt_u32 s2, 0x2120
|
||||
; FLATSCR-NEXT: scratch_store_byte off, v0, s3
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: s_cbranch_scc1 BB2_1
|
||||
; FLATSCR-NEXT: s_cbranch_scc1 .LBB2_1
|
||||
; FLATSCR-NEXT: ; %bb.2: ; %split
|
||||
; FLATSCR-NEXT: s_movk_i32 s2, 0x1000
|
||||
; FLATSCR-NEXT: s_addk_i32 s2, 0x2000
|
||||
|
|
|
@ -12,22 +12,22 @@ define <3 x float> @liveout_undef_subrange(<3 x float> %arg) {
|
|||
; CHECK-NEXT: v_add_f32_e32 v3, v2, v2
|
||||
; CHECK-NEXT: v_add_f32_e32 v1, v1, v1
|
||||
; CHECK-NEXT: v_add_f32_e32 v0, v0, v0
|
||||
; CHECK-NEXT: BB0_1: ; %bb1
|
||||
; CHECK-NEXT: .LBB0_1: ; %bb1
|
||||
; CHECK-NEXT: ; =>This Loop Header: Depth=1
|
||||
; CHECK-NEXT: ; Child Loop BB0_2 Depth 2
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], 0
|
||||
; CHECK-NEXT: BB0_2: ; %bb1
|
||||
; CHECK-NEXT: .LBB0_2: ; %bb1
|
||||
; CHECK-NEXT: ; Parent Loop BB0_1 Depth=1
|
||||
; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
|
||||
; CHECK-NEXT: v_cmp_neq_f32_e32 vcc, 0, v2
|
||||
; CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
||||
; CHECK-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_execnz BB0_2
|
||||
; CHECK-NEXT: s_cbranch_execnz .LBB0_2
|
||||
; CHECK-NEXT: ; %bb.3: ; %bb2
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: v_mul_f32_e32 v2, v3, v2
|
||||
; CHECK-NEXT: s_branch BB0_1
|
||||
; CHECK-NEXT: s_branch .LBB0_1
|
||||
bb:
|
||||
br label %bb1
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs -asm-verbose=0 < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10-ASM %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s -filetype=obj | llvm-objdump -d --arch-name=amdgcn --mcpu=gfx1030 - | FileCheck --check-prefixes=GCN,GFX10,GFX10-DIS %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s -filetype=obj | llvm-objdump -d --arch-name=amdgcn --mcpu=gfx1030 --symbolize-operands - | FileCheck --check-prefixes=GCN,GFX10,GFX10-DIS %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=GFX8 %s
|
||||
|
||||
; GFX8-NOT: s_inst_prefetch
|
||||
|
@ -8,8 +8,8 @@
|
|||
; GCN-LABEL: test_loop_64
|
||||
; GFX10: s_movk_i32 s{{[0-9]+}}, 0x400
|
||||
; GFX10-DIS-NEXT: {{^$}}
|
||||
; GFX10-ASM-NEXT: [[L1:BB[0-9_]+]]:
|
||||
; GFX10-DIS-NEXT: <[[L1:BB[0-9_]+]]>:
|
||||
; GFX10-ASM-NEXT: [[L1:.LBB[0-9_]+]]:
|
||||
; GFX10-DIS-NEXT: <[[L1:L[0-9]+]]>:
|
||||
; GFX10: s_sleep 0
|
||||
; GFX10: s_cbranch_scc0 [[L1]]
|
||||
; GFX10-NEXT: s_endpgm
|
||||
|
@ -33,8 +33,8 @@ bb2: ; preds = %bb2, %bb
|
|||
; GFX10-ASM-NEXT: .p2align 6
|
||||
; GFX10-DIS-NEXT: s_nop 0
|
||||
; GFX10-NOT: s_inst_prefetch
|
||||
; GFX10-ASM: [[L1:BB[0-9_]+]]:
|
||||
; GFX10-DIS: <[[L1:BB[0-9_]+]]>:
|
||||
; GFX10-ASM: [[L1:.LBB[0-9_]+]]:
|
||||
; GFX10-DIS: <[[L1:L[0-9]+]]>:
|
||||
; GFX10: s_sleep 0
|
||||
; GFX10: s_cbranch_scc0 [[L1]]
|
||||
; GFX10-NEXT: s_endpgm
|
||||
|
@ -74,8 +74,8 @@ bb2: ; preds = %bb2, %bb
|
|||
; GFX10-ASM-NEXT: .p2align 6
|
||||
; GFX10-DIS-NEXT: s_nop 0
|
||||
; GFX10-NOT: s_inst_prefetch
|
||||
; GFX10-ASM: [[L1:BB[0-9_]+]]:
|
||||
; GFX10-DIS: <[[L1:BB[0-9_]+]]>:
|
||||
; GFX10-ASM: [[L1:.LBB[0-9_]+]]:
|
||||
; GFX10-DIS: <[[L1:L[0-9]+]]>:
|
||||
; GFX10: s_sleep 0
|
||||
; GFX10: s_cbranch_scc0 [[L1]]
|
||||
; GFX10-NEXT: s_inst_prefetch 0x2
|
||||
|
@ -131,8 +131,8 @@ bb2: ; preds = %bb2, %bb
|
|||
; GCN-LABEL: test_loop_256
|
||||
; GFX10: s_movk_i32 s{{[0-9]+}}, 0x400
|
||||
; GFX10-DIS-NEXT: {{^$}}
|
||||
; GFX10-ASM-NEXT: [[L1:BB[0-9_]+]]:
|
||||
; GFX10-DIS-NEXT: <[[L1:BB[0-9_]+]]>:
|
||||
; GFX10-ASM-NEXT: [[L1:.LBB[0-9_]+]]:
|
||||
; GFX10-DIS-NEXT: <[[L1:L[0-9]+]]>:
|
||||
; GFX10: s_sleep 0
|
||||
; GFX10: s_cbranch_scc0 [[L1]]
|
||||
; GFX10-NEXT: s_endpgm
|
||||
|
@ -205,14 +205,14 @@ bb2: ; preds = %bb2, %bb
|
|||
; GFX10-ASM-NEXT: .p2align 6
|
||||
; GFX10-DIS-NEXT: s_nop 0
|
||||
; GFX10-NOT: s_inst_prefetch
|
||||
; GFX10-ASM: [[L1:BB[0-9_]+]]:
|
||||
; GFX10-DIS: <[[L1:BB[0-9_]+]]>:
|
||||
; GFX10-ASM: [[L1:.LBB[0-9_]+]]:
|
||||
; GFX10-DIS: <[[L1:L[0-9]+]]>:
|
||||
; GFX10-NOT: s_inst_prefetch
|
||||
; GFX10-ASM: .p2align 6
|
||||
; GFX10-DIS: s_nop 0
|
||||
; GFX10-NOT: s_inst_prefetch
|
||||
; GFX10-ASM: [[L2:BB[0-9_]+]]:
|
||||
; GFX10-DIS: <[[L2:BB[0-9_]+]]>:
|
||||
; GFX10-ASM: [[L2:.LBB[0-9_]+]]:
|
||||
; GFX10-DIS: <[[L2:L[0-9]+]]>:
|
||||
; GFX10-NOT: s_inst_prefetch
|
||||
; GFX10: s_sleep 0
|
||||
; GFX10: s_cbranch_scc{{[01]}} [[L2]]
|
||||
|
@ -281,20 +281,20 @@ bb4:
|
|||
; GFX10-NOT: .p2align 6
|
||||
; GFX10-NOT: s_nop
|
||||
; GFX10-NOT: s_inst_prefetch
|
||||
; GFX10-ASM: [[L0:BB[0-9_]+]]:
|
||||
; GFX10-DIS: <[[L0:BB[0-9_]+]]>:
|
||||
; GFX10-ASM: [[L0:.LBB[0-9_]+]]:
|
||||
; GFX10-DIS: <[[L0:L[0-9]+]]>:
|
||||
; GFX10: s_inst_prefetch 0x1
|
||||
; GFX10-ASM-NEXT: .p2align 6
|
||||
; GFX10-DIS-NEXT: s_nop 0
|
||||
; GFX10-NOT: s_inst_prefetch
|
||||
; GFX10-ASM: [[L1:BB[0-9_]+]]:
|
||||
; GFX10-DIS: <[[L1:BB[0-9_]+]]>:
|
||||
; GFX10-ASM: [[L1:.LBB[0-9_]+]]:
|
||||
; GFX10-DIS: <[[L1:L[0-9]+]]>:
|
||||
; GFX10-NOT: s_inst_prefetch
|
||||
; GFX10-ASM: .p2align 6
|
||||
; GFX10-DIS: s_nop 0
|
||||
; GFX10-NOT: s_inst_prefetch
|
||||
; GFX10-ASM: [[L2:BB[0-9_]+]]:
|
||||
; GFX10-DIS: <[[L2:BB[0-9_]+]]>:
|
||||
; GFX10-ASM: [[L2:.LBB[0-9_]+]]:
|
||||
; GFX10-DIS: <[[L2:L[0-9]+]]>:
|
||||
; GFX10-NOT: s_inst_prefetch
|
||||
; GFX10: s_sleep 0
|
||||
; GFX10: s_cbranch_scc{{[01]}} [[L2]]
|
||||
|
|
|
@ -39,12 +39,12 @@ define amdgpu_kernel void @break_loop(i32 %arg) #0 {
|
|||
; GCN-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5
|
||||
; GCN-NEXT: ; implicit-def: $sgpr6
|
||||
; GCN-NEXT: BB0_1: ; %bb1
|
||||
; GCN-NEXT: .LBB0_1: ; %bb1
|
||||
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: s_add_i32 s6, s6, 1
|
||||
; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec
|
||||
; GCN-NEXT: s_cmp_gt_i32 s6, -1
|
||||
; GCN-NEXT: s_cbranch_scc1 BB0_3
|
||||
; GCN-NEXT: s_cbranch_scc1 .LBB0_3
|
||||
; GCN-NEXT: ; %bb.2: ; %bb4
|
||||
; GCN-NEXT: ; in Loop: Header=BB0_1 Depth=1
|
||||
; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc
|
||||
|
@ -53,12 +53,12 @@ define amdgpu_kernel void @break_loop(i32 %arg) #0 {
|
|||
; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
|
||||
; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
|
||||
; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
|
||||
; GCN-NEXT: BB0_3: ; %Flow
|
||||
; GCN-NEXT: .LBB0_3: ; %Flow
|
||||
; GCN-NEXT: ; in Loop: Header=BB0_1 Depth=1
|
||||
; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
|
||||
; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
|
||||
; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
|
||||
; GCN-NEXT: s_cbranch_execnz BB0_1
|
||||
; GCN-NEXT: s_cbranch_execnz .LBB0_1
|
||||
; GCN-NEXT: ; %bb.4: ; %bb9
|
||||
; GCN-NEXT: s_endpgm
|
||||
bb:
|
||||
|
@ -117,11 +117,11 @@ define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
|
|||
; GCN-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5
|
||||
; GCN-NEXT: ; implicit-def: $sgpr6
|
||||
; GCN-NEXT: BB1_1: ; %bb1
|
||||
; GCN-NEXT: .LBB1_1: ; %bb1
|
||||
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
|
||||
; GCN-NEXT: s_cmp_gt_i32 s6, -1
|
||||
; GCN-NEXT: s_cbranch_scc1 BB1_3
|
||||
; GCN-NEXT: s_cbranch_scc1 .LBB1_3
|
||||
; GCN-NEXT: ; %bb.2: ; %bb4
|
||||
; GCN-NEXT: ; in Loop: Header=BB1_1 Depth=1
|
||||
; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc
|
||||
|
@ -130,13 +130,13 @@ define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
|
|||
; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
|
||||
; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
|
||||
; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
|
||||
; GCN-NEXT: BB1_3: ; %Flow
|
||||
; GCN-NEXT: .LBB1_3: ; %Flow
|
||||
; GCN-NEXT: ; in Loop: Header=BB1_1 Depth=1
|
||||
; GCN-NEXT: s_add_i32 s6, s6, 1
|
||||
; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
|
||||
; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
|
||||
; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
|
||||
; GCN-NEXT: s_cbranch_execnz BB1_1
|
||||
; GCN-NEXT: s_cbranch_execnz .LBB1_1
|
||||
; GCN-NEXT: ; %bb.4: ; %bb9
|
||||
; GCN-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 7
|
||||
|
@ -208,11 +208,11 @@ define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
|
|||
; GCN-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5
|
||||
; GCN-NEXT: ; implicit-def: $sgpr6
|
||||
; GCN-NEXT: BB2_1: ; %bb1
|
||||
; GCN-NEXT: .LBB2_1: ; %bb1
|
||||
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec
|
||||
; GCN-NEXT: s_cmp_gt_i32 s6, -1
|
||||
; GCN-NEXT: s_cbranch_scc1 BB2_3
|
||||
; GCN-NEXT: s_cbranch_scc1 .LBB2_3
|
||||
; GCN-NEXT: ; %bb.2: ; %bb4
|
||||
; GCN-NEXT: ; in Loop: Header=BB2_1 Depth=1
|
||||
; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc
|
||||
|
@ -221,13 +221,13 @@ define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
|
|||
; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
|
||||
; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
|
||||
; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
|
||||
; GCN-NEXT: BB2_3: ; %Flow
|
||||
; GCN-NEXT: .LBB2_3: ; %Flow
|
||||
; GCN-NEXT: ; in Loop: Header=BB2_1 Depth=1
|
||||
; GCN-NEXT: s_add_i32 s6, s6, 1
|
||||
; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
|
||||
; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
|
||||
; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
|
||||
; GCN-NEXT: s_cbranch_execnz BB2_1
|
||||
; GCN-NEXT: s_cbranch_execnz .LBB2_1
|
||||
; GCN-NEXT: ; %bb.4: ; %bb9
|
||||
; GCN-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 7
|
||||
|
@ -296,11 +296,11 @@ define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
|
|||
; GCN-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5
|
||||
; GCN-NEXT: ; implicit-def: $sgpr6
|
||||
; GCN-NEXT: BB3_1: ; %bb1
|
||||
; GCN-NEXT: .LBB3_1: ; %bb1
|
||||
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec
|
||||
; GCN-NEXT: s_cmp_gt_i32 s6, -1
|
||||
; GCN-NEXT: s_cbranch_scc1 BB3_3
|
||||
; GCN-NEXT: s_cbranch_scc1 .LBB3_3
|
||||
; GCN-NEXT: ; %bb.2: ; %bb4
|
||||
; GCN-NEXT: ; in Loop: Header=BB3_1 Depth=1
|
||||
; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc
|
||||
|
@ -309,13 +309,13 @@ define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
|
|||
; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
|
||||
; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
|
||||
; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
|
||||
; GCN-NEXT: BB3_3: ; %Flow
|
||||
; GCN-NEXT: .LBB3_3: ; %Flow
|
||||
; GCN-NEXT: ; in Loop: Header=BB3_1 Depth=1
|
||||
; GCN-NEXT: s_add_i32 s6, s6, 1
|
||||
; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
|
||||
; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
|
||||
; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
|
||||
; GCN-NEXT: s_cbranch_execnz BB3_1
|
||||
; GCN-NEXT: s_cbranch_execnz .LBB3_1
|
||||
; GCN-NEXT: ; %bb.4: ; %bb9
|
||||
; GCN-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 7
|
||||
|
@ -384,11 +384,11 @@ define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
|
|||
; GCN-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5
|
||||
; GCN-NEXT: ; implicit-def: $sgpr6
|
||||
; GCN-NEXT: BB4_1: ; %bb1
|
||||
; GCN-NEXT: .LBB4_1: ; %bb1
|
||||
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
|
||||
; GCN-NEXT: s_cmp_gt_i32 s6, -1
|
||||
; GCN-NEXT: s_cbranch_scc1 BB4_3
|
||||
; GCN-NEXT: s_cbranch_scc1 .LBB4_3
|
||||
; GCN-NEXT: ; %bb.2: ; %bb4
|
||||
; GCN-NEXT: ; in Loop: Header=BB4_1 Depth=1
|
||||
; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc
|
||||
|
@ -397,13 +397,13 @@ define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
|
|||
; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
|
||||
; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
|
||||
; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
|
||||
; GCN-NEXT: BB4_3: ; %Flow
|
||||
; GCN-NEXT: .LBB4_3: ; %Flow
|
||||
; GCN-NEXT: ; in Loop: Header=BB4_1 Depth=1
|
||||
; GCN-NEXT: s_add_i32 s6, s6, 1
|
||||
; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
|
||||
; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
|
||||
; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
|
||||
; GCN-NEXT: s_cbranch_execnz BB4_1
|
||||
; GCN-NEXT: s_cbranch_execnz .LBB4_1
|
||||
; GCN-NEXT: ; %bb.4: ; %bb9
|
||||
; GCN-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 7
|
||||
|
@ -476,11 +476,11 @@ define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
|
|||
; GCN-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5
|
||||
; GCN-NEXT: ; implicit-def: $sgpr6
|
||||
; GCN-NEXT: BB5_1: ; %bb1
|
||||
; GCN-NEXT: .LBB5_1: ; %bb1
|
||||
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec
|
||||
; GCN-NEXT: s_cmp_gt_i32 s6, -1
|
||||
; GCN-NEXT: s_cbranch_scc1 BB5_3
|
||||
; GCN-NEXT: s_cbranch_scc1 .LBB5_3
|
||||
; GCN-NEXT: ; %bb.2: ; %bb4
|
||||
; GCN-NEXT: ; in Loop: Header=BB5_1 Depth=1
|
||||
; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc
|
||||
|
@ -489,14 +489,14 @@ define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
|
|||
; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
|
||||
; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
|
||||
; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
|
||||
; GCN-NEXT: BB5_3: ; %Flow
|
||||
; GCN-NEXT: .LBB5_3: ; %Flow
|
||||
; GCN-NEXT: ; in Loop: Header=BB5_1 Depth=1
|
||||
; GCN-NEXT: s_xor_b64 s[8:9], s[4:5], -1
|
||||
; GCN-NEXT: s_add_i32 s6, s6, 1
|
||||
; GCN-NEXT: s_and_b64 s[8:9], exec, s[8:9]
|
||||
; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
|
||||
; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
|
||||
; GCN-NEXT: s_cbranch_execnz BB5_1
|
||||
; GCN-NEXT: s_cbranch_execnz .LBB5_1
|
||||
; GCN-NEXT: ; %bb.4: ; %bb9
|
||||
; GCN-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 7
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
; Check that we do not copy agprs to vgprs and back inside the loop.
|
||||
|
||||
; GCN: [[LOOP:BB[0-9_]+]]:
|
||||
; GCN: [[LOOP:.LBB[0-9_]+]]:
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GFX908_A: v_mfma_f32_32x32x1f32
|
||||
; GCN-NOT: v_accvgpr
|
||||
|
@ -50,7 +50,7 @@ exit:
|
|||
; GFX90A: v_accvgpr_write_b32 [[LEAD:a[0-9]+]], [[TMP]]
|
||||
; GFX90A-COUNT-31: v_accvgpr_mov_b32 a{{[0-9]+}}, [[LEAD]]
|
||||
|
||||
; GCN: [[LOOP:BB[0-9_]+]]:
|
||||
; GCN: [[LOOP:.LBB[0-9_]+]]:
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GFX908_A: v_mfma_f32_32x32x1f32
|
||||
; GCN-NOT: v_accvgpr
|
||||
|
@ -85,7 +85,7 @@ exit:
|
|||
; GFX908-COUNT-30: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}}
|
||||
; GFX90A-COUNT-30: v_accvgpr_mov_b32 a{{[0-9]+}}, [[LEAD]]{{$}}
|
||||
|
||||
; GCN: [[LOOP:BB[0-9_]+]]:
|
||||
; GCN: [[LOOP:.LBB[0-9_]+]]:
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GFX908_A: v_mfma_f32_32x32x1f32
|
||||
; GCN-NOT: v_accvgpr
|
||||
|
@ -249,7 +249,7 @@ exit:
|
|||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
|
||||
|
||||
; GCN: [[LOOP:BB[0-9_]+]]:
|
||||
; GCN: [[LOOP:.LBB[0-9_]+]]:
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GFX908_A: v_mfma_f32_32x32x1f32
|
||||
; GCN-NOT: v_accvgpr
|
||||
|
@ -281,7 +281,7 @@ exit:
|
|||
|
||||
; GCN-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, v0{{$}}
|
||||
|
||||
; GCN: [[LOOP:BB[0-9_]+]]:
|
||||
; GCN: [[LOOP:.LBB[0-9_]+]]:
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GFX908_A: v_mfma_f32_32x32x1f32
|
||||
; GCN-NOT: v_accvgpr
|
||||
|
@ -351,7 +351,7 @@ exit:
|
|||
; GFX90A: v_accvgpr_write_b32 [[LEAD:a[0-9]+]], [[TMP]]
|
||||
; GFX90A-COUNT-31: v_accvgpr_mov_b32 a{{[0-9]+}}, [[LEAD]]
|
||||
|
||||
; GCN: [[LOOP:BB[0-9_]+]]:
|
||||
; GCN: [[LOOP:.LBB[0-9_]+]]:
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GFX908_A: v_mfma_f32_32x32x1f32
|
||||
; GCN-NOT: v_accvgpr
|
||||
|
@ -451,7 +451,7 @@ exit:
|
|||
; GFX90A-DAG: v_accvgpr_write_b32 [[LEAD:a[0-9]+]], 0
|
||||
; GFX90A-COUNT-28: v_accvgpr_mov_b32 a{{[0-9]+}}, [[LEAD]]
|
||||
|
||||
; GCN: [[LOOP:BB[0-9_]+]]:
|
||||
; GCN: [[LOOP:.LBB[0-9_]+]]:
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GFX908_A: v_mfma_f32_32x32x1f32
|
||||
; GCN-NOT: v_accvgpr
|
||||
|
@ -493,7 +493,7 @@ exit:
|
|||
; GFX90A-NOT: v_accvgpr
|
||||
; GCN-NOT: v_accvgpr
|
||||
|
||||
; GCN: [[LOOP:BB[0-9_]+]]:
|
||||
; GCN: [[LOOP:.LBB[0-9_]+]]:
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GFX908_A: v_mfma_f32_32x32x1f32
|
||||
; GCN-NOT: v_accvgpr
|
||||
|
@ -538,7 +538,7 @@ exit:
|
|||
; GFX90A: v_accvgpr_write_b32 [[LEAD:a[0-9]+]], [[TMP]]{{$}}
|
||||
; GFX90A-COUNT-29: v_accvgpr_mov_b32 a{{[0-9]+}}, [[LEAD]]
|
||||
|
||||
; GCN: [[LOOP:BB[0-9_]+]]:
|
||||
; GCN: [[LOOP:.LBB[0-9_]+]]:
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GFX908_A: v_mfma_f32_32x32x1f32
|
||||
; GCN-NOT: v_accvgpr
|
||||
|
@ -609,9 +609,9 @@ exit:
|
|||
|
||||
; Check that we do not copy agprs to vgprs and back in an outer loop.
|
||||
|
||||
; GCN: [[OUTER_LOOP:BB[0-9_]+]]:
|
||||
; GCN: [[OUTER_LOOP:.LBB[0-9_]+]]:
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GCN: [[INNER_LOOP:BB[0-9_]+]]:
|
||||
; GCN: [[INNER_LOOP:.LBB[0-9_]+]]:
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GFX908_A: v_mfma_f32_32x32x1f32
|
||||
; GCN-NOT: v_accvgpr
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
; W64-LABEL: mubuf_vgpr
|
||||
; W64: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec
|
||||
; W64: [[LOOPBB:BB[0-9]+_[0-9]+]]:
|
||||
; W64: [[LOOPBB:.LBB[0-9]+_[0-9]+]]:
|
||||
; W64-DAG: v_readfirstlane_b32 s[[SRSRC0:[0-9]+]], v[[VRSRC0:[0-9]+]]
|
||||
; W64-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]]
|
||||
; W64-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]]
|
||||
|
@ -24,7 +24,7 @@
|
|||
|
||||
; W32-LABEL: mubuf_vgpr
|
||||
; W32: s_mov_b32 [[SAVEEXEC:s[0-9]+]], exec_lo
|
||||
; W32: [[LOOPBB:BB[0-9]+_[0-9]+]]:
|
||||
; W32: [[LOOPBB:.LBB[0-9]+_[0-9]+]]:
|
||||
; W32-DAG: v_readfirstlane_b32 s[[SRSRC0:[0-9]+]], v[[VRSRC0:[0-9]+]]
|
||||
; W32-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]]
|
||||
; W32-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]]
|
||||
|
@ -48,7 +48,7 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
|
|||
; W64-LABEL: mubuf_vgpr_adjacent_in_block
|
||||
|
||||
; W64: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec
|
||||
; W64: [[LOOPBB0:BB[0-9]+_[0-9]+]]:
|
||||
; W64: [[LOOPBB0:.LBB[0-9]+_[0-9]+]]:
|
||||
; W64-DAG: v_readfirstlane_b32 s[[SRSRC0:[0-9]+]], v[[VRSRC0:[0-9]+]]
|
||||
; W64-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]]
|
||||
; W64-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]]
|
||||
|
@ -65,7 +65,7 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
|
|||
; FIXME: redundant s_mov
|
||||
; W64: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec
|
||||
|
||||
; W64: [[LOOPBB1:BB[0-9]+_[0-9]+]]:
|
||||
; W64: [[LOOPBB1:.LBB[0-9]+_[0-9]+]]:
|
||||
; W64-DAG: v_readfirstlane_b32 s[[SRSRC0:[0-9]+]], v[[VRSRC0:[0-9]+]]
|
||||
; W64-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]]
|
||||
; W64-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]]
|
||||
|
@ -86,7 +86,7 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
|
|||
; W32-LABEL: mubuf_vgpr_adjacent_in_block
|
||||
|
||||
; W32: s_mov_b32 [[SAVEEXEC:s[0-9]+]], exec_lo
|
||||
; W32: [[LOOPBB0:BB[0-9]+_[0-9]+]]:
|
||||
; W32: [[LOOPBB0:.LBB[0-9]+_[0-9]+]]:
|
||||
; W32-DAG: v_readfirstlane_b32 s[[SRSRC0:[0-9]+]], v[[VRSRC0:[0-9]+]]
|
||||
; W32-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]]
|
||||
; W32-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]]
|
||||
|
@ -103,7 +103,7 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
|
|||
; FIXME: redundant s_mov
|
||||
; W32: s_mov_b32 [[SAVEEXEC:s[0-9]+]], exec_lo
|
||||
|
||||
; W32: [[LOOPBB1:BB[0-9]+_[0-9]+]]:
|
||||
; W32: [[LOOPBB1:.LBB[0-9]+_[0-9]+]]:
|
||||
; W32-DAG: v_readfirstlane_b32 s[[SRSRC0:[0-9]+]], v[[VRSRC0:[0-9]+]]
|
||||
; W32-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]]
|
||||
; W32-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]]
|
||||
|
@ -135,7 +135,7 @@ entry:
|
|||
; W64-DAG: v_mov_b32_e32 [[IDX:v[0-9]+]], s{{[0-9]+}}
|
||||
; W64-DAG: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec
|
||||
|
||||
; W64: [[LOOPBB0:BB[0-9]+_[0-9]+]]:
|
||||
; W64: [[LOOPBB0:.LBB[0-9]+_[0-9]+]]:
|
||||
; W64-DAG: v_readfirstlane_b32 s[[SRSRC0:[0-9]+]], v[[VRSRC0:[0-9]+]]
|
||||
; W64-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]]
|
||||
; W64-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]]
|
||||
|
@ -149,13 +149,13 @@ entry:
|
|||
; W64: s_cbranch_execnz [[LOOPBB0]]
|
||||
|
||||
; W64: s_mov_b64 exec, [[SAVEEXEC]]
|
||||
; W64: s_cbranch_execz [[TERMBB:BB[0-9]+_[0-9]+]]
|
||||
; W64: s_cbranch_execz [[TERMBB:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; W64: ; %bb.{{[0-9]+}}:
|
||||
; W64-DAG: v_mov_b32_e32 [[IDX:v[0-9]+]], s{{[0-9]+}}
|
||||
; W64-DAG: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec
|
||||
|
||||
; W64: [[LOOPBB1:BB[0-9]+_[0-9]+]]:
|
||||
; W64: [[LOOPBB1:.LBB[0-9]+_[0-9]+]]:
|
||||
; W64-DAG: v_readfirstlane_b32 s[[SRSRC0:[0-9]+]], v[[VRSRC0:[0-9]+]]
|
||||
; W64-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]]
|
||||
; W64-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]]
|
||||
|
@ -179,7 +179,7 @@ entry:
|
|||
; W32-DAG: v_mov_b32_e32 [[IDX:v[0-9]+]], s4
|
||||
; W32-DAG: s_mov_b32 [[SAVEEXEC:s[0-9]+]], exec_lo
|
||||
|
||||
; W32: [[LOOPBB0:BB[0-9]+_[0-9]+]]:
|
||||
; W32: [[LOOPBB0:.LBB[0-9]+_[0-9]+]]:
|
||||
; W32-DAG: v_readfirstlane_b32 s[[SRSRC0:[0-9]+]], v[[VRSRC0:[0-9]+]]
|
||||
; W32-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]]
|
||||
; W32-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]]
|
||||
|
@ -193,13 +193,13 @@ entry:
|
|||
; W32: s_cbranch_execnz [[LOOPBB0]]
|
||||
|
||||
; W32: s_mov_b32 exec_lo, [[SAVEEXEC]]
|
||||
; W32: s_cbranch_execz [[TERMBB:BB[0-9]+_[0-9]+]]
|
||||
; W32: s_cbranch_execz [[TERMBB:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; W32: ; %bb.{{[0-9]+}}:
|
||||
; W32-DAG: v_mov_b32_e32 [[IDX:v[0-9]+]], s4
|
||||
; W32-DAG: s_mov_b32 [[SAVEEXEC:s[0-9]+]], exec_lo
|
||||
|
||||
; W32: [[LOOPBB1:BB[0-9]+_[0-9]+]]:
|
||||
; W32: [[LOOPBB1:.LBB[0-9]+_[0-9]+]]:
|
||||
; W32-DAG: v_readfirstlane_b32 s[[SRSRC0:[0-9]+]], v[[VRSRC0:[0-9]+]]
|
||||
; W32-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]]
|
||||
; W32-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]]
|
||||
|
@ -228,7 +228,7 @@ entry:
|
|||
; W64-O0-DAG: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec
|
||||
; W64-O0-DAG: buffer_store_dword [[IDX_V]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; 4-byte Folded Spill
|
||||
|
||||
; W64-O0: [[LOOPBB0:BB[0-9]+_[0-9]+]]: ; =>This Inner Loop Header: Depth=1
|
||||
; W64-O0: [[LOOPBB0:.LBB[0-9]+_[0-9]+]]: ; =>This Inner Loop Header: Depth=1
|
||||
; W64-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 ; 4-byte Folded Reload
|
||||
; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
|
||||
; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
|
||||
|
@ -259,7 +259,7 @@ entry:
|
|||
; XXX-W64-O0: s_mov_b64 exec, [[SAVEEXEC]]
|
||||
; W64-O0: buffer_load_dword [[RES:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF_TMP]] ; 4-byte Folded Reload
|
||||
; W64-O0: buffer_store_dword [[RES]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF:[0-9]+]] ; 4-byte Folded Spill
|
||||
; W64-O0: s_cbranch_execz [[TERMBB:BB[0-9]+_[0-9]+]]
|
||||
; W64-O0: s_cbranch_execz [[TERMBB:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; W64-O0: ; %bb.{{[0-9]+}}: ; %bb1
|
||||
; W64-O0-DAG: buffer_store_dword {{v[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:[[IDX_OFF:[0-9]+]] ; 4-byte Folded Spill
|
||||
|
@ -267,7 +267,7 @@ entry:
|
|||
; W64-O0: v_writelane_b32 [[VSAVEEXEC:v[0-9]+]], s[[SAVEEXEC0]], [[SAVEEXEC_IDX0:[0-9]+]]
|
||||
; W64-O0: v_writelane_b32 [[VSAVEEXEC]], s[[SAVEEXEC1]], [[SAVEEXEC_IDX1:[0-9]+]]
|
||||
|
||||
; W64-O0: [[LOOPBB1:BB[0-9]+_[0-9]+]]: ; =>This Inner Loop Header: Depth=1
|
||||
; W64-O0: [[LOOPBB1:.LBB[0-9]+_[0-9]+]]: ; =>This Inner Loop Header: Depth=1
|
||||
; W64-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:[[IDX_OFF]] ; 4-byte Folded Reload
|
||||
; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
|
||||
; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
|
||||
|
|
|
@ -16,7 +16,7 @@ define void @lsr_order_mul24_0(i32 %arg, i32 %arg2, i32 %arg6, i32 %arg13, i32 %
|
|||
; GFX9-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: ds_write_b32 v0, v5
|
||||
; GFX9-NEXT: BB0_1: ; %bb23
|
||||
; GFX9-NEXT: .LBB0_1: ; %bb23
|
||||
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX9-NEXT: v_mul_u32_u24_e32 v5, v0, v2
|
||||
; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
|
||||
|
@ -25,7 +25,7 @@ define void @lsr_order_mul24_0(i32 %arg, i32 %arg2, i32 %arg6, i32 %arg13, i32 %
|
|||
; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v5, v3
|
||||
; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
||||
; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: s_cbranch_execnz BB0_1
|
||||
; GFX9-NEXT: s_cbranch_execnz .LBB0_1
|
||||
; GFX9-NEXT: ; %bb.2: ; %.loopexit
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -58,7 +58,7 @@ define void @lsr_order_mul24_1(i32 %arg, i32 %arg1, i32 %arg2, float addrspace(3
|
|||
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v5
|
||||
; GFX9-NEXT: v_cmp_lt_u32_e64 s[4:5], v0, v1
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
|
||||
; GFX9-NEXT: s_cbranch_execz BB1_3
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB1_3
|
||||
; GFX9-NEXT: ; %bb.1: ; %bb19
|
||||
; GFX9-NEXT: v_cvt_f32_u32_e32 v7, v6
|
||||
; GFX9-NEXT: v_add_u32_e32 v4, v4, v0
|
||||
|
@ -70,7 +70,7 @@ define void @lsr_order_mul24_1(i32 %arg, i32 %arg1, i32 %arg2, float addrspace(3
|
|||
; GFX9-NEXT: s_mov_b64 s[10:11], 0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, 0x3727c5ac
|
||||
; GFX9-NEXT: v_mov_b32_e32 v4, 0
|
||||
; GFX9-NEXT: BB1_2: ; %bb23
|
||||
; GFX9-NEXT: .LBB1_2: ; %bb23
|
||||
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v0
|
||||
; GFX9-NEXT: v_add_u32_e32 v12, v17, v0
|
||||
|
@ -100,8 +100,8 @@ define void @lsr_order_mul24_1(i32 %arg, i32 %arg1, i32 %arg2, float addrspace(3
|
|||
; GFX9-NEXT: ds_write_b32 v6, v3
|
||||
; GFX9-NEXT: v_add_u32_e32 v6, v6, v8
|
||||
; GFX9-NEXT: s_andn2_b64 exec, exec, s[10:11]
|
||||
; GFX9-NEXT: s_cbranch_execnz BB1_2
|
||||
; GFX9-NEXT: BB1_3: ; %Flow3
|
||||
; GFX9-NEXT: s_cbranch_execnz .LBB1_2
|
||||
; GFX9-NEXT: .LBB1_3: ; %Flow3
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
|
|
|
@ -708,10 +708,10 @@ define amdgpu_kernel void @simplify_i24_crash(<2 x i32> addrspace(1)* %out, i32
|
|||
; SI-NEXT: s_load_dword s2, s[0:1], 0xb
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: s_cmp_lg_u32 s2, 0
|
||||
; SI-NEXT: s_cbranch_scc0 BB8_2
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB8_2
|
||||
; SI-NEXT: ; %bb.1: ; %bb7
|
||||
; SI-NEXT: s_endpgm
|
||||
; SI-NEXT: BB8_2: ; %bb11
|
||||
; SI-NEXT: .LBB8_2: ; %bb11
|
||||
; SI-NEXT: s_load_dword s2, s[0:1], 0xd
|
||||
; SI-NEXT: s_load_dword s4, s[0:1], 0xf
|
||||
; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
|
||||
|
@ -731,10 +731,10 @@ define amdgpu_kernel void @simplify_i24_crash(<2 x i32> addrspace(1)* %out, i32
|
|||
; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: s_cmp_lg_u32 s2, 0
|
||||
; VI-NEXT: s_cbranch_scc0 BB8_2
|
||||
; VI-NEXT: s_cbranch_scc0 .LBB8_2
|
||||
; VI-NEXT: ; %bb.1: ; %bb7
|
||||
; VI-NEXT: s_endpgm
|
||||
; VI-NEXT: BB8_2: ; %bb11
|
||||
; VI-NEXT: .LBB8_2: ; %bb11
|
||||
; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
|
||||
; VI-NEXT: s_load_dword s2, s[0:1], 0x34
|
||||
; VI-NEXT: s_load_dword s0, s[0:1], 0x3c
|
||||
|
@ -754,10 +754,10 @@ define amdgpu_kernel void @simplify_i24_crash(<2 x i32> addrspace(1)* %out, i32
|
|||
; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_cmp_lg_u32 s2, 0
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB8_2
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB8_2
|
||||
; GFX9-NEXT: ; %bb.1: ; %bb7
|
||||
; GFX9-NEXT: s_endpgm
|
||||
; GFX9-NEXT: BB8_2: ; %bb11
|
||||
; GFX9-NEXT: .LBB8_2: ; %bb11
|
||||
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
|
||||
; GFX9-NEXT: s_load_dword s2, s[0:1], 0x34
|
||||
; GFX9-NEXT: s_load_dword s3, s[0:1], 0x3c
|
||||
|
|
|
@ -362,7 +362,7 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
|
|||
|
||||
; GCN-LABEL: {{^}}uniform_branch_to_multi_divergent_region_exit_ret_ret_return_value:
|
||||
; GCN: s_cmp_gt_i32 s0, 1
|
||||
; GCN: s_cbranch_scc0 [[FLOW:BB[0-9]+_[0-9]+]]
|
||||
; GCN: s_cbranch_scc0 [[FLOW:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: v_cmp_ne_u32_e32 vcc, 7, v0
|
||||
|
||||
|
@ -373,7 +373,7 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
|
|||
; GCN-NOT: s_and_b64 exec, exec
|
||||
; GCN: v_mov_b32_e32 v0, 1.0
|
||||
|
||||
; GCN: {{^BB[0-9]+_[0-9]+}}: ; %UnifiedReturnBlock
|
||||
; GCN: {{^.LBB[0-9]+_[0-9]+}}: ; %UnifiedReturnBlock
|
||||
; GCN-NEXT: s_or_b64 exec, exec
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: ; return
|
||||
|
|
|
@ -47,36 +47,36 @@ define amdgpu_vs void @multi_else_break(<4 x float> %vec, i32 %ub, i32 %cont) {
|
|||
; GCN: ; %bb.0: ; %main_body
|
||||
; GCN-NEXT: s_mov_b64 s[0:1], 0
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: s_branch BB0_2
|
||||
; GCN-NEXT: BB0_1: ; %loop.exit.guard
|
||||
; GCN-NEXT: s_branch .LBB0_2
|
||||
; GCN-NEXT: .LBB0_1: ; %loop.exit.guard
|
||||
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
|
||||
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN-NEXT: s_and_b64 s[2:3], exec, s[2:3]
|
||||
; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
|
||||
; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
|
||||
; GCN-NEXT: s_cbranch_execz BB0_6
|
||||
; GCN-NEXT: BB0_2: ; %LOOP.outer
|
||||
; GCN-NEXT: s_cbranch_execz .LBB0_6
|
||||
; GCN-NEXT: .LBB0_2: ; %LOOP.outer
|
||||
; GCN-NEXT: ; =>This Loop Header: Depth=1
|
||||
; GCN-NEXT: ; Child Loop BB0_4 Depth 2
|
||||
; GCN-NEXT: ; implicit-def: $sgpr6_sgpr7
|
||||
; GCN-NEXT: ; implicit-def: $sgpr2_sgpr3
|
||||
; GCN-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GCN-NEXT: s_branch BB0_4
|
||||
; GCN-NEXT: BB0_3: ; %Flow
|
||||
; GCN-NEXT: s_branch .LBB0_4
|
||||
; GCN-NEXT: .LBB0_3: ; %Flow
|
||||
; GCN-NEXT: ; in Loop: Header=BB0_4 Depth=2
|
||||
; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GCN-NEXT: s_and_b64 s[8:9], exec, s[6:7]
|
||||
; GCN-NEXT: s_or_b64 s[4:5], s[8:9], s[4:5]
|
||||
; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
||||
; GCN-NEXT: s_cbranch_execz BB0_1
|
||||
; GCN-NEXT: BB0_4: ; %LOOP
|
||||
; GCN-NEXT: s_cbranch_execz .LBB0_1
|
||||
; GCN-NEXT: .LBB0_4: ; %LOOP
|
||||
; GCN-NEXT: ; Parent Loop BB0_2 Depth=1
|
||||
; GCN-NEXT: ; => This Inner Loop Header: Depth=2
|
||||
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, v0, v4
|
||||
; GCN-NEXT: s_or_b64 s[2:3], s[2:3], exec
|
||||
; GCN-NEXT: s_or_b64 s[6:7], s[6:7], exec
|
||||
; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc
|
||||
; GCN-NEXT: s_cbranch_execz BB0_3
|
||||
; GCN-NEXT: s_cbranch_execz .LBB0_3
|
||||
; GCN-NEXT: ; %bb.5: ; %ENDIF
|
||||
; GCN-NEXT: ; in Loop: Header=BB0_4 Depth=2
|
||||
; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v0
|
||||
|
@ -85,8 +85,8 @@ define amdgpu_vs void @multi_else_break(<4 x float> %vec, i32 %ub, i32 %cont) {
|
|||
; GCN-NEXT: s_andn2_b64 s[6:7], s[6:7], exec
|
||||
; GCN-NEXT: s_and_b64 s[10:11], vcc, exec
|
||||
; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11]
|
||||
; GCN-NEXT: s_branch BB0_3
|
||||
; GCN-NEXT: BB0_6: ; %IF
|
||||
; GCN-NEXT: s_branch .LBB0_3
|
||||
; GCN-NEXT: .LBB0_6: ; %IF
|
||||
; GCN-NEXT: s_endpgm
|
||||
main_body:
|
||||
br label %LOOP.outer
|
||||
|
@ -182,8 +182,8 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
|
|||
; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s2, v0
|
||||
; GCN-NEXT: s_mov_b32 s2, -1
|
||||
; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5
|
||||
; GCN-NEXT: s_branch BB1_2
|
||||
; GCN-NEXT: BB1_1: ; %Flow4
|
||||
; GCN-NEXT: s_branch .LBB1_2
|
||||
; GCN-NEXT: .LBB1_1: ; %Flow4
|
||||
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
|
||||
; GCN-NEXT: s_and_b64 s[6:7], exec, s[6:7]
|
||||
; GCN-NEXT: s_or_b64 s[0:1], s[6:7], s[0:1]
|
||||
|
@ -191,8 +191,8 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
|
|||
; GCN-NEXT: s_and_b64 s[6:7], s[8:9], exec
|
||||
; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
|
||||
; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
|
||||
; GCN-NEXT: s_cbranch_execz BB1_9
|
||||
; GCN-NEXT: BB1_2: ; %bb1
|
||||
; GCN-NEXT: s_cbranch_execz .LBB1_9
|
||||
; GCN-NEXT: .LBB1_2: ; %bb1
|
||||
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -201,14 +201,14 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
|
|||
; GCN-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-NEXT: ; implicit-def: $sgpr8_sgpr9
|
||||
; GCN-NEXT: s_mov_b64 s[10:11], -1
|
||||
; GCN-NEXT: s_cbranch_vccnz BB1_6
|
||||
; GCN-NEXT: s_cbranch_vccnz .LBB1_6
|
||||
; GCN-NEXT: ; %bb.3: ; %LeafBlock1
|
||||
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
|
||||
; GCN-NEXT: s_mov_b64 s[6:7], -1
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-NEXT: s_mov_b64 s[8:9], -1
|
||||
; GCN-NEXT: s_cbranch_vccz BB1_5
|
||||
; GCN-NEXT: s_cbranch_vccz .LBB1_5
|
||||
; GCN-NEXT: ; %bb.4: ; %case1
|
||||
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
|
||||
; GCN-NEXT: buffer_load_dword v2, off, s[0:3], 0 glc
|
||||
|
@ -216,19 +216,19 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
|
|||
; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v2
|
||||
; GCN-NEXT: s_mov_b64 s[8:9], 0
|
||||
; GCN-NEXT: s_orn2_b64 s[6:7], vcc, exec
|
||||
; GCN-NEXT: BB1_5: ; %Flow3
|
||||
; GCN-NEXT: .LBB1_5: ; %Flow3
|
||||
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
|
||||
; GCN-NEXT: s_mov_b64 s[10:11], 0
|
||||
; GCN-NEXT: BB1_6: ; %Flow
|
||||
; GCN-NEXT: .LBB1_6: ; %Flow
|
||||
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, s[10:11]
|
||||
; GCN-NEXT: s_cbranch_vccz BB1_1
|
||||
; GCN-NEXT: s_cbranch_vccz .LBB1_1
|
||||
; GCN-NEXT: ; %bb.7: ; %LeafBlock
|
||||
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-NEXT: s_mov_b64 s[8:9], -1
|
||||
; GCN-NEXT: s_cbranch_vccz BB1_1
|
||||
; GCN-NEXT: s_cbranch_vccz .LBB1_1
|
||||
; GCN-NEXT: ; %bb.8: ; %case0
|
||||
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
|
||||
; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc
|
||||
|
@ -238,8 +238,8 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
|
|||
; GCN-NEXT: s_andn2_b64 s[6:7], s[6:7], exec
|
||||
; GCN-NEXT: s_and_b64 s[10:11], vcc, exec
|
||||
; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11]
|
||||
; GCN-NEXT: s_branch BB1_1
|
||||
; GCN-NEXT: BB1_9: ; %loop.exit.guard
|
||||
; GCN-NEXT: s_branch .LBB1_1
|
||||
; GCN-NEXT: .LBB1_9: ; %loop.exit.guard
|
||||
; GCN-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GCN-NEXT: s_and_saveexec_b64 s[0:1], s[4:5]
|
||||
; GCN-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
|
||||
|
|
|
@ -19,32 +19,32 @@ define amdgpu_kernel void @reduced_nested_loop_conditions(i64 addrspace(3)* noca
|
|||
; GCN-NEXT: v_add_i32_e32 v0, vcc, s0, v0
|
||||
; GCN-NEXT: ds_read_b64 v[0:1], v0
|
||||
; GCN-NEXT: s_and_b64 s[0:1], exec, -1
|
||||
; GCN-NEXT: s_branch BB0_2
|
||||
; GCN-NEXT: BB0_1: ; %bb10
|
||||
; GCN-NEXT: s_branch .LBB0_2
|
||||
; GCN-NEXT: .LBB0_1: ; %bb10
|
||||
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
|
||||
; GCN-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GCN-NEXT: s_andn2_b64 vcc, exec, s[2:3]
|
||||
; GCN-NEXT: s_cbranch_vccz BB0_4
|
||||
; GCN-NEXT: BB0_2: ; %bb5
|
||||
; GCN-NEXT: s_cbranch_vccz .LBB0_4
|
||||
; GCN-NEXT: .LBB0_2: ; %bb5
|
||||
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: s_mov_b64 vcc, s[0:1]
|
||||
; GCN-NEXT: s_cbranch_vccnz BB0_1
|
||||
; GCN-NEXT: s_cbranch_vccnz .LBB0_1
|
||||
; GCN-NEXT: ; %bb.3: ; in Loop: Header=BB0_2 Depth=1
|
||||
; GCN-NEXT: s_mov_b64 s[4:5], -1
|
||||
; GCN-NEXT: s_andn2_b64 vcc, exec, s[2:3]
|
||||
; GCN-NEXT: s_cbranch_vccnz BB0_2
|
||||
; GCN-NEXT: BB0_4: ; %loop.exit.guard
|
||||
; GCN-NEXT: s_cbranch_vccnz .LBB0_2
|
||||
; GCN-NEXT: .LBB0_4: ; %loop.exit.guard
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, s[4:5]
|
||||
; GCN-NEXT: s_cbranch_vccz BB0_7
|
||||
; GCN-NEXT: s_cbranch_vccz .LBB0_7
|
||||
; GCN-NEXT: ; %bb.5: ; %bb8
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: ds_read_b32 v0, v0
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, 0
|
||||
; GCN-NEXT: BB0_6: ; %bb9
|
||||
; GCN-NEXT: .LBB0_6: ; %bb9
|
||||
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-NEXT: s_mov_b64 vcc, vcc
|
||||
; GCN-NEXT: s_cbranch_vccz BB0_6
|
||||
; GCN-NEXT: BB0_7: ; %DummyReturnBlock
|
||||
; GCN-NEXT: s_cbranch_vccz .LBB0_6
|
||||
; GCN-NEXT: .LBB0_7: ; %DummyReturnBlock
|
||||
; GCN-NEXT: s_endpgm
|
||||
; IR-LABEL: @reduced_nested_loop_conditions(
|
||||
; IR-NEXT: bb:
|
||||
|
@ -152,29 +152,29 @@ define amdgpu_kernel void @nested_loop_conditions(i64 addrspace(1)* nocapture %a
|
|||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 8, v0
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-NEXT: s_cbranch_vccnz BB1_6
|
||||
; GCN-NEXT: s_cbranch_vccnz .LBB1_6
|
||||
; GCN-NEXT: ; %bb.1: ; %bb14.lr.ph
|
||||
; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0
|
||||
; GCN-NEXT: s_branch BB1_3
|
||||
; GCN-NEXT: BB1_2: ; in Loop: Header=BB1_3 Depth=1
|
||||
; GCN-NEXT: s_branch .LBB1_3
|
||||
; GCN-NEXT: .LBB1_2: ; in Loop: Header=BB1_3 Depth=1
|
||||
; GCN-NEXT: s_mov_b64 s[0:1], -1
|
||||
; GCN-NEXT: ; implicit-def: $vgpr0
|
||||
; GCN-NEXT: s_cbranch_execnz BB1_6
|
||||
; GCN-NEXT: BB1_3: ; %bb14
|
||||
; GCN-NEXT: s_cbranch_execnz .LBB1_6
|
||||
; GCN-NEXT: .LBB1_3: ; %bb14
|
||||
; GCN-NEXT: ; =>This Loop Header: Depth=1
|
||||
; GCN-NEXT: ; Child Loop BB1_4 Depth 2
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-NEXT: s_cbranch_vccnz BB1_2
|
||||
; GCN-NEXT: BB1_4: ; %bb18
|
||||
; GCN-NEXT: s_cbranch_vccnz .LBB1_2
|
||||
; GCN-NEXT: .LBB1_4: ; %bb18
|
||||
; GCN-NEXT: ; Parent Loop BB1_3 Depth=1
|
||||
; GCN-NEXT: ; => This Inner Loop Header: Depth=2
|
||||
; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 8, v0
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-NEXT: s_cbranch_vccnz BB1_4
|
||||
; GCN-NEXT: s_cbranch_vccnz .LBB1_4
|
||||
; GCN-NEXT: ; %bb.5: ; %bb21
|
||||
; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1
|
||||
; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0
|
||||
|
@ -182,8 +182,8 @@ define amdgpu_kernel void @nested_loop_conditions(i64 addrspace(1)* nocapture %a
|
|||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_cmp_lt_i32_e64 s[0:1], 8, v1
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, s[0:1]
|
||||
; GCN-NEXT: s_cbranch_vccz BB1_3
|
||||
; GCN-NEXT: BB1_6: ; %bb31
|
||||
; GCN-NEXT: s_cbranch_vccz .LBB1_3
|
||||
; GCN-NEXT: .LBB1_6: ; %bb31
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
|
|
|
@ -22,10 +22,10 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
|
|||
; MUBUF-NEXT: s_mov_b32 s33, 0
|
||||
; MUBUF-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; MUBUF-NEXT: s_cmp_lg_u32 s8, 0
|
||||
; MUBUF-NEXT: s_cbranch_scc1 BB0_3
|
||||
; MUBUF-NEXT: s_cbranch_scc1 .LBB0_3
|
||||
; MUBUF-NEXT: ; %bb.1: ; %bb.0
|
||||
; MUBUF-NEXT: s_cmp_lg_u32 s9, 0
|
||||
; MUBUF-NEXT: s_cbranch_scc1 BB0_3
|
||||
; MUBUF-NEXT: s_cbranch_scc1 .LBB0_3
|
||||
; MUBUF-NEXT: ; %bb.2: ; %bb.1
|
||||
; MUBUF-NEXT: s_add_i32 s6, s32, 0x1000
|
||||
; MUBUF-NEXT: s_lshl_b32 s7, s10, 2
|
||||
|
@ -43,7 +43,7 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
|
|||
; MUBUF-NEXT: v_add_u32_e32 v0, v2, v0
|
||||
; MUBUF-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; MUBUF-NEXT: global_store_dword v1, v0, s[4:5]
|
||||
; MUBUF-NEXT: BB0_3: ; %bb.2
|
||||
; MUBUF-NEXT: .LBB0_3: ; %bb.2
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v0, 0
|
||||
; MUBUF-NEXT: global_store_dword v[0:1], v0, off
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -58,10 +58,10 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
|
|||
; FLATSCR-NEXT: s_mov_b32 s33, 0
|
||||
; FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; FLATSCR-NEXT: s_cmp_lg_u32 s4, 0
|
||||
; FLATSCR-NEXT: s_cbranch_scc1 BB0_3
|
||||
; FLATSCR-NEXT: s_cbranch_scc1 .LBB0_3
|
||||
; FLATSCR-NEXT: ; %bb.1: ; %bb.0
|
||||
; FLATSCR-NEXT: s_cmp_lg_u32 s5, 0
|
||||
; FLATSCR-NEXT: s_cbranch_scc1 BB0_3
|
||||
; FLATSCR-NEXT: s_cbranch_scc1 .LBB0_3
|
||||
; FLATSCR-NEXT: ; %bb.2: ; %bb.1
|
||||
; FLATSCR-NEXT: v_mov_b32_e32 v1, 0
|
||||
; FLATSCR-NEXT: s_add_i32 s2, s32, 0x1000
|
||||
|
@ -76,7 +76,7 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
|
|||
; FLATSCR-NEXT: v_add_u32_e32 v0, v2, v0
|
||||
; FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; FLATSCR-NEXT: global_store_dword v1, v0, s[0:1]
|
||||
; FLATSCR-NEXT: BB0_3: ; %bb.2
|
||||
; FLATSCR-NEXT: .LBB0_3: ; %bb.2
|
||||
; FLATSCR-NEXT: v_mov_b32_e32 v0, 0
|
||||
; FLATSCR-NEXT: global_store_dword v[0:1], v0, off
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -124,7 +124,7 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
|
|||
; MUBUF-NEXT: s_mov_b32 s33, 0
|
||||
; MUBUF-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; MUBUF-NEXT: s_cmp_lg_u32 s6, 0
|
||||
; MUBUF-NEXT: s_cbranch_scc1 BB1_2
|
||||
; MUBUF-NEXT: s_cbranch_scc1 .LBB1_2
|
||||
; MUBUF-NEXT: ; %bb.1: ; %bb.0
|
||||
; MUBUF-NEXT: s_add_i32 s6, s32, 0x1000
|
||||
; MUBUF-NEXT: s_and_b32 s6, s6, 0xfffff000
|
||||
|
@ -143,7 +143,7 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
|
|||
; MUBUF-NEXT: v_add_u32_e32 v0, v2, v0
|
||||
; MUBUF-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; MUBUF-NEXT: global_store_dword v1, v0, s[4:5]
|
||||
; MUBUF-NEXT: BB1_2: ; %bb.1
|
||||
; MUBUF-NEXT: .LBB1_2: ; %bb.1
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v0, 0
|
||||
; MUBUF-NEXT: global_store_dword v[0:1], v0, off
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -158,7 +158,7 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
|
|||
; FLATSCR-NEXT: s_mov_b32 s33, 0
|
||||
; FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; FLATSCR-NEXT: s_cmp_lg_u32 s2, 0
|
||||
; FLATSCR-NEXT: s_cbranch_scc1 BB1_2
|
||||
; FLATSCR-NEXT: s_cbranch_scc1 .LBB1_2
|
||||
; FLATSCR-NEXT: ; %bb.1: ; %bb.0
|
||||
; FLATSCR-NEXT: s_add_i32 s2, s32, 0x1000
|
||||
; FLATSCR-NEXT: v_mov_b32_e32 v1, 0
|
||||
|
@ -174,7 +174,7 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
|
|||
; FLATSCR-NEXT: v_add_u32_e32 v0, v2, v0
|
||||
; FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; FLATSCR-NEXT: global_store_dword v1, v0, s[0:1]
|
||||
; FLATSCR-NEXT: BB1_2: ; %bb.1
|
||||
; FLATSCR-NEXT: .LBB1_2: ; %bb.1
|
||||
; FLATSCR-NEXT: v_mov_b32_e32 v0, 0
|
||||
; FLATSCR-NEXT: global_store_dword v[0:1], v0, off
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -217,11 +217,11 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i
|
|||
; MUBUF-NEXT: s_mov_b32 s33, s32
|
||||
; MUBUF-NEXT: s_addk_i32 s32, 0x400
|
||||
; MUBUF-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; MUBUF-NEXT: s_cbranch_execz BB2_3
|
||||
; MUBUF-NEXT: s_cbranch_execz .LBB2_3
|
||||
; MUBUF-NEXT: ; %bb.1: ; %bb.0
|
||||
; MUBUF-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
|
||||
; MUBUF-NEXT: s_and_b64 exec, exec, vcc
|
||||
; MUBUF-NEXT: s_cbranch_execz BB2_3
|
||||
; MUBUF-NEXT: s_cbranch_execz .LBB2_3
|
||||
; MUBUF-NEXT: ; %bb.2: ; %bb.1
|
||||
; MUBUF-NEXT: s_add_i32 s6, s32, 0x1000
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v2, 0
|
||||
|
@ -236,7 +236,7 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i
|
|||
; MUBUF-NEXT: s_waitcnt vmcnt(0)
|
||||
; MUBUF-NEXT: v_add_u32_e32 v2, v2, v3
|
||||
; MUBUF-NEXT: global_store_dword v[0:1], v2, off
|
||||
; MUBUF-NEXT: BB2_3: ; %bb.2
|
||||
; MUBUF-NEXT: .LBB2_3: ; %bb.2
|
||||
; MUBUF-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v0, 0
|
||||
; MUBUF-NEXT: global_store_dword v[0:1], v0, off
|
||||
|
@ -253,11 +253,11 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i
|
|||
; FLATSCR-NEXT: s_mov_b32 s33, s32
|
||||
; FLATSCR-NEXT: s_add_i32 s32, s32, 16
|
||||
; FLATSCR-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; FLATSCR-NEXT: s_cbranch_execz BB2_3
|
||||
; FLATSCR-NEXT: s_cbranch_execz .LBB2_3
|
||||
; FLATSCR-NEXT: ; %bb.1: ; %bb.0
|
||||
; FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
|
||||
; FLATSCR-NEXT: s_and_b64 exec, exec, vcc
|
||||
; FLATSCR-NEXT: s_cbranch_execz BB2_3
|
||||
; FLATSCR-NEXT: s_cbranch_execz .LBB2_3
|
||||
; FLATSCR-NEXT: ; %bb.2: ; %bb.1
|
||||
; FLATSCR-NEXT: s_add_i32 s2, s32, 0x1000
|
||||
; FLATSCR-NEXT: v_mov_b32_e32 v2, 0
|
||||
|
@ -270,7 +270,7 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i
|
|||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: v_add_u32_e32 v2, v2, v3
|
||||
; FLATSCR-NEXT: global_store_dword v[0:1], v2, off
|
||||
; FLATSCR-NEXT: BB2_3: ; %bb.2
|
||||
; FLATSCR-NEXT: .LBB2_3: ; %bb.2
|
||||
; FLATSCR-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; FLATSCR-NEXT: v_mov_b32_e32 v0, 0
|
||||
; FLATSCR-NEXT: global_store_dword v[0:1], v0, off
|
||||
|
@ -316,7 +316,7 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
|
|||
; MUBUF-NEXT: s_and_b32 s33, s33, 0xfffff000
|
||||
; MUBUF-NEXT: s_addk_i32 s32, 0x2000
|
||||
; MUBUF-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; MUBUF-NEXT: s_cbranch_execz BB3_2
|
||||
; MUBUF-NEXT: s_cbranch_execz .LBB3_2
|
||||
; MUBUF-NEXT: ; %bb.1: ; %bb.0
|
||||
; MUBUF-NEXT: s_add_i32 s6, s32, 0x1000
|
||||
; MUBUF-NEXT: s_and_b32 s6, s6, 0xfffff000
|
||||
|
@ -332,7 +332,7 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
|
|||
; MUBUF-NEXT: s_waitcnt vmcnt(0)
|
||||
; MUBUF-NEXT: v_add_u32_e32 v2, v2, v3
|
||||
; MUBUF-NEXT: global_store_dword v[0:1], v2, off
|
||||
; MUBUF-NEXT: BB3_2: ; %bb.1
|
||||
; MUBUF-NEXT: .LBB3_2: ; %bb.1
|
||||
; MUBUF-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v0, 0
|
||||
; MUBUF-NEXT: global_store_dword v[0:1], v0, off
|
||||
|
@ -350,7 +350,7 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
|
|||
; FLATSCR-NEXT: s_andn2_b32 s33, s33, 63
|
||||
; FLATSCR-NEXT: s_addk_i32 s32, 0x80
|
||||
; FLATSCR-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; FLATSCR-NEXT: s_cbranch_execz BB3_2
|
||||
; FLATSCR-NEXT: s_cbranch_execz .LBB3_2
|
||||
; FLATSCR-NEXT: ; %bb.1: ; %bb.0
|
||||
; FLATSCR-NEXT: s_add_i32 s2, s32, 0x1000
|
||||
; FLATSCR-NEXT: s_and_b32 s2, s2, 0xfffff000
|
||||
|
@ -364,7 +364,7 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
|
|||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: v_add_u32_e32 v2, v2, v3
|
||||
; FLATSCR-NEXT: global_store_dword v[0:1], v2, off
|
||||
; FLATSCR-NEXT: BB3_2: ; %bb.1
|
||||
; FLATSCR-NEXT: .LBB3_2: ; %bb.1
|
||||
; FLATSCR-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; FLATSCR-NEXT: v_mov_b32_e32 v0, 0
|
||||
; FLATSCR-NEXT: global_store_dword v[0:1], v0, off
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; GCN-LABEL: {{^}}negated_cond:
|
||||
; GCN: BB0_1:
|
||||
; GCN: .LBB0_1:
|
||||
; GCN: v_cmp_eq_u32_e64 [[CC:[^,]+]],
|
||||
; GCN: BB0_3:
|
||||
; GCN: .LBB0_3:
|
||||
; GCN-NOT: v_cndmask_b32
|
||||
; GCN-NOT: v_cmp
|
||||
; GCN: s_andn2_b64 vcc, exec, [[CC]]
|
||||
; GCN: s_cbranch_vccnz BB0_2
|
||||
; GCN: s_cbranch_vccnz .LBB0_2
|
||||
define amdgpu_kernel void @negated_cond(i32 addrspace(1)* %arg1) {
|
||||
bb:
|
||||
br label %bb1
|
||||
|
@ -37,14 +37,14 @@ bb4:
|
|||
; GCN-LABEL: {{^}}negated_cond_dominated_blocks:
|
||||
; GCN: s_cmp_lg_u32
|
||||
; GCN: s_cselect_b64 [[CC1:[^,]+]], -1, 0
|
||||
; GCN: s_branch [[BB1:BB[0-9]+_[0-9]+]]
|
||||
; GCN: [[BB0:BB[0-9]+_[0-9]+]]
|
||||
; GCN: s_branch [[BB1:.LBB[0-9]+_[0-9]+]]
|
||||
; GCN: [[BB0:.LBB[0-9]+_[0-9]+]]
|
||||
; GCN-NOT: v_cndmask_b32
|
||||
; GCN-NOT: v_cmp
|
||||
; GCN: [[BB1]]:
|
||||
; GCN: s_mov_b64 [[CC2:[^,]+]], -1
|
||||
; GCN: s_mov_b64 vcc, [[CC1]]
|
||||
; GCN: s_cbranch_vccz [[BB2:BB[0-9]+_[0-9]+]]
|
||||
; GCN: s_cbranch_vccz [[BB2:.LBB[0-9]+_[0-9]+]]
|
||||
; GCN: s_mov_b64 [[CC2]], 0
|
||||
; GCN: [[BB2]]:
|
||||
; GCN: s_andn2_b64 vcc, exec, [[CC2]]
|
||||
|
|
|
@ -202,7 +202,7 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(i32 addrspace(1)* %out,
|
|||
; GCN-NEXT: s_mov_b32 s1, 0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_cmp_lg_u32 s0, s1
|
||||
; GCN-NEXT: s_cbranch_scc1 BB0_2
|
||||
; GCN-NEXT: s_cbranch_scc1 .LBB0_2
|
||||
; GCN-NEXT: ; %bb.1: ; %bb0
|
||||
; GCN-NEXT: v_readlane_b32 s8, v1, 56
|
||||
; GCN-NEXT: v_readlane_b32 s9, v1, 57
|
||||
|
@ -391,7 +391,7 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(i32 addrspace(1)* %out,
|
|||
; GCN-NEXT: ;;#ASMSTART
|
||||
; GCN-NEXT: ; use s[0:7]
|
||||
; GCN-NEXT: ;;#ASMEND
|
||||
; GCN-NEXT: BB0_2: ; %ret
|
||||
; GCN-NEXT: .LBB0_2: ; %ret
|
||||
; GCN-NEXT: s_endpgm
|
||||
%wide.sgpr0 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
|
||||
%wide.sgpr1 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
|
||||
|
@ -538,7 +538,7 @@ define amdgpu_kernel void @split_sgpr_spill_2_vgprs(i32 addrspace(1)* %out, i32
|
|||
; GCN-NEXT: s_mov_b32 s1, 0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_cmp_lg_u32 s0, s1
|
||||
; GCN-NEXT: s_cbranch_scc1 BB1_2
|
||||
; GCN-NEXT: s_cbranch_scc1 .LBB1_2
|
||||
; GCN-NEXT: ; %bb.1: ; %bb0
|
||||
; GCN-NEXT: v_readlane_b32 s16, v1, 8
|
||||
; GCN-NEXT: v_readlane_b32 s17, v1, 9
|
||||
|
@ -632,7 +632,7 @@ define amdgpu_kernel void @split_sgpr_spill_2_vgprs(i32 addrspace(1)* %out, i32
|
|||
; GCN-NEXT: ;;#ASMSTART
|
||||
; GCN-NEXT: ; use s[0:15]
|
||||
; GCN-NEXT: ;;#ASMEND
|
||||
; GCN-NEXT: BB1_2: ; %ret
|
||||
; GCN-NEXT: .LBB1_2: ; %ret
|
||||
; GCN-NEXT: s_endpgm
|
||||
%wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
|
||||
%wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
|
||||
|
@ -773,7 +773,7 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32
|
|||
; GCN-NEXT: s_mov_b32 s1, 0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_cmp_lg_u32 s0, s1
|
||||
; GCN-NEXT: s_cbranch_scc1 BB2_2
|
||||
; GCN-NEXT: s_cbranch_scc1 .LBB2_2
|
||||
; GCN-NEXT: ; %bb.1: ; %bb0
|
||||
; GCN-NEXT: v_readlane_b32 s36, v31, 32
|
||||
; GCN-NEXT: v_readlane_b32 s37, v31, 33
|
||||
|
@ -864,7 +864,7 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32
|
|||
; GCN-NEXT: ;;#ASMSTART
|
||||
; GCN-NEXT: ; use s[0:1]
|
||||
; GCN-NEXT: ;;#ASMEND
|
||||
; GCN-NEXT: BB2_2: ; %ret
|
||||
; GCN-NEXT: .LBB2_2: ; %ret
|
||||
; GCN-NEXT: s_endpgm
|
||||
call void asm sideeffect "", "~{v[0:7]}" () #0
|
||||
call void asm sideeffect "", "~{v[8:15]}" () #0
|
||||
|
@ -1008,7 +1008,7 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 {
|
|||
; GCN-NEXT: s_mov_b32 s1, 0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_cmp_lg_u32 s0, s1
|
||||
; GCN-NEXT: s_cbranch_scc1 BB3_2
|
||||
; GCN-NEXT: s_cbranch_scc1 .LBB3_2
|
||||
; GCN-NEXT: ; %bb.1: ; %bb0
|
||||
; GCN-NEXT: v_readlane_b32 s36, v31, 32
|
||||
; GCN-NEXT: v_readlane_b32 s37, v31, 33
|
||||
|
@ -1105,7 +1105,7 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 {
|
|||
; GCN-NEXT: ;;#ASMSTART
|
||||
; GCN-NEXT: ; use v0
|
||||
; GCN-NEXT: ;;#ASMEND
|
||||
; GCN-NEXT: BB3_2: ; %ret
|
||||
; GCN-NEXT: .LBB3_2: ; %ret
|
||||
; GCN-NEXT: s_endpgm
|
||||
call void asm sideeffect "", "~{v[0:7]}" () #0
|
||||
call void asm sideeffect "", "~{v[8:15]}" () #0
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
; GCN-LABEL: {{^}}test_remat_sgpr:
|
||||
; GCN-NOT: v_writelane_b32
|
||||
; GCN: {{^}}[[LOOP:BB[0-9_]+]]:
|
||||
; GCN: {{^}}[[LOOP:.LBB[0-9_]+]]:
|
||||
; GCN-COUNT-6: s_mov_b32 s{{[0-9]+}}, 0x
|
||||
; GCN-NOT: v_writelane_b32
|
||||
; GCN: s_cbranch_{{[^ ]+}} [[LOOP]]
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
|
||||
; GCN-LABEL: {{^}}uniform_br_trivial_ret_divergent_br_trivial_unreachable:
|
||||
; GCN: s_cbranch_scc1 [[RET_BB:BB[0-9]+_[0-9]+]]
|
||||
; GCN: s_cbranch_scc1 [[RET_BB:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN-NEXT: ; %else
|
||||
|
||||
|
@ -57,15 +57,15 @@ ret.bb: ; preds = %else, %main_body
|
|||
; GCN: s_cbranch_vccz
|
||||
|
||||
; GCN: ; %bb.{{[0-9]+}}: ; %Flow
|
||||
; GCN: s_cbranch_execnz [[RETURN:BB[0-9]+_[0-9]+]]
|
||||
; GCN: s_cbranch_execnz [[RETURN:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: ; %UnifiedReturnBlock
|
||||
; GCN-NEXT: s_or_b64 exec, exec
|
||||
; GCN-NEXT: s_waitcnt
|
||||
|
||||
; GCN: BB{{[0-9]+_[0-9]+}}: ; %else
|
||||
; GCN: .LBB{{[0-9]+_[0-9]+}}: ; %else
|
||||
; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; GCN-NEXT: s_cbranch_execz BB1_{{[0-9]+}}
|
||||
; GCN-NEXT: s_cbranch_execz .LBB1_{{[0-9]+}}
|
||||
|
||||
; GCN-NEXT: ; %unreachable.bb
|
||||
; GCN: ds_write_b32
|
||||
|
|
|
@ -440,7 +440,7 @@ entry:
|
|||
; GCN: s_load_dword [[SGPR:s[0-9]+]]
|
||||
; GCN: v_cmp_le_u32_e32 vcc, [[SGPR]], v{{[0-9]+}}
|
||||
; GCN: s_and_b64 vcc, exec, vcc
|
||||
; GCN: s_cbranch_vccnz [[EXIT:[A-Z0-9_]+]]
|
||||
; GCN: s_cbranch_vccnz [[EXIT:.L[A-Z0-9_]+]]
|
||||
; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
|
||||
; GCN-NOHSA: buffer_store_dword [[ONE]]
|
||||
; GCN-HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[ONE]]
|
||||
|
@ -487,7 +487,7 @@ bb4:
|
|||
; GCN-LABEL: {{^}}phi_imm_in_sgprs
|
||||
; GCN: s_movk_i32 [[A:s[0-9]+]], 0x400
|
||||
; GCN: s_movk_i32 [[B:s[0-9]+]], 0x400
|
||||
; GCN: [[LOOP_LABEL:[0-9a-zA-Z_]+]]:
|
||||
; GCN: [[LOOP_LABEL:.L[0-9a-zA-Z_]+]]:
|
||||
; GCN: s_xor_b32 [[B]], [[B]], [[A]]
|
||||
; GCN: s_cbranch_scc{{[01]}} [[LOOP_LABEL]]
|
||||
define amdgpu_kernel void @phi_imm_in_sgprs(i32 addrspace(3)* %out, i32 %cond) {
|
||||
|
|
|
@ -175,7 +175,7 @@ define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
|||
; GCN-IR-NEXT: s_xor_b64 s[22:23], s[18:19], -1
|
||||
; GCN-IR-NEXT: s_and_b64 s[20:21], s[22:23], s[20:21]
|
||||
; GCN-IR-NEXT: s_and_b64 vcc, exec, s[20:21]
|
||||
; GCN-IR-NEXT: s_cbranch_vccz BB0_5
|
||||
; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5
|
||||
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
|
||||
; GCN-IR-NEXT: s_add_u32 s18, s12, 1
|
||||
; GCN-IR-NEXT: v_mov_b32_e32 v0, s12
|
||||
|
@ -185,7 +185,7 @@ define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
|||
; GCN-IR-NEXT: s_sub_i32 s12, 63, s12
|
||||
; GCN-IR-NEXT: s_andn2_b64 vcc, exec, vcc
|
||||
; GCN-IR-NEXT: s_lshl_b64 s[12:13], s[10:11], s12
|
||||
; GCN-IR-NEXT: s_cbranch_vccz BB0_4
|
||||
; GCN-IR-NEXT: s_cbranch_vccz .LBB0_4
|
||||
; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader
|
||||
; GCN-IR-NEXT: s_lshr_b64 s[18:19], s[10:11], s18
|
||||
; GCN-IR-NEXT: s_add_u32 s20, s6, -1
|
||||
|
@ -196,7 +196,7 @@ define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
|||
; GCN-IR-NEXT: s_addc_u32 s11, s9, s15
|
||||
; GCN-IR-NEXT: s_mov_b64 s[14:15], 0
|
||||
; GCN-IR-NEXT: s_mov_b32 s9, 0
|
||||
; GCN-IR-NEXT: BB0_3: ; %udiv-do-while
|
||||
; GCN-IR-NEXT: .LBB0_3: ; %udiv-do-while
|
||||
; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-IR-NEXT: s_lshl_b64 s[16:17], s[18:19], 1
|
||||
; GCN-IR-NEXT: s_lshr_b32 s8, s13, 31
|
||||
|
@ -218,19 +218,19 @@ define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
|||
; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[10:11], v[0:1]
|
||||
; GCN-IR-NEXT: s_mov_b64 s[14:15], s[8:9]
|
||||
; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-IR-NEXT: s_cbranch_vccz BB0_3
|
||||
; GCN-IR-NEXT: BB0_4: ; %Flow6
|
||||
; GCN-IR-NEXT: s_cbranch_vccz .LBB0_3
|
||||
; GCN-IR-NEXT: .LBB0_4: ; %Flow6
|
||||
; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[12:13], 1
|
||||
; GCN-IR-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7]
|
||||
; GCN-IR-NEXT: v_mov_b32_e32 v0, s6
|
||||
; GCN-IR-NEXT: v_mov_b32_e32 v1, s7
|
||||
; GCN-IR-NEXT: s_branch BB0_6
|
||||
; GCN-IR-NEXT: BB0_5:
|
||||
; GCN-IR-NEXT: s_branch .LBB0_6
|
||||
; GCN-IR-NEXT: .LBB0_5:
|
||||
; GCN-IR-NEXT: v_mov_b32_e32 v0, s11
|
||||
; GCN-IR-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[18:19]
|
||||
; GCN-IR-NEXT: v_mov_b32_e32 v0, s10
|
||||
; GCN-IR-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[18:19]
|
||||
; GCN-IR-NEXT: BB0_6: ; %udiv-end
|
||||
; GCN-IR-NEXT: .LBB0_6: ; %udiv-end
|
||||
; GCN-IR-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1]
|
||||
; GCN-IR-NEXT: v_xor_b32_e32 v0, s0, v0
|
||||
; GCN-IR-NEXT: v_xor_b32_e32 v1, s1, v1
|
||||
|
@ -410,7 +410,7 @@ define i64 @v_test_sdiv(i64 %x, i64 %y) {
|
|||
; GCN-IR-NEXT: v_mov_b32_e32 v16, v17
|
||||
; GCN-IR-NEXT: v_cndmask_b32_e64 v9, v11, 0, s[6:7]
|
||||
; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; GCN-IR-NEXT: s_cbranch_execz BB1_6
|
||||
; GCN-IR-NEXT: s_cbranch_execz .LBB1_6
|
||||
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
|
||||
; GCN-IR-NEXT: v_add_i32_e32 v14, vcc, 1, v7
|
||||
; GCN-IR-NEXT: v_addc_u32_e32 v15, vcc, 0, v8, vcc
|
||||
|
@ -422,7 +422,7 @@ define i64 @v_test_sdiv(i64 %x, i64 %y) {
|
|||
; GCN-IR-NEXT: s_mov_b64 s[10:11], 0
|
||||
; GCN-IR-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GCN-IR-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
|
||||
; GCN-IR-NEXT: s_cbranch_execz BB1_5
|
||||
; GCN-IR-NEXT: s_cbranch_execz .LBB1_5
|
||||
; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader
|
||||
; GCN-IR-NEXT: v_add_i32_e32 v18, vcc, -1, v2
|
||||
; GCN-IR-NEXT: v_addc_u32_e32 v19, vcc, -1, v3, vcc
|
||||
|
@ -434,7 +434,7 @@ define i64 @v_test_sdiv(i64 %x, i64 %y) {
|
|||
; GCN-IR-NEXT: v_mov_b32_e32 v16, 0
|
||||
; GCN-IR-NEXT: v_mov_b32_e32 v17, 0
|
||||
; GCN-IR-NEXT: v_mov_b32_e32 v10, 0
|
||||
; GCN-IR-NEXT: BB1_3: ; %udiv-do-while
|
||||
; GCN-IR-NEXT: .LBB1_3: ; %udiv-do-while
|
||||
; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-IR-NEXT: v_lshl_b64 v[14:15], v[14:15], 1
|
||||
; GCN-IR-NEXT: v_lshrrev_b32_e32 v0, 31, v8
|
||||
|
@ -459,15 +459,15 @@ define i64 @v_test_sdiv(i64 %x, i64 %y) {
|
|||
; GCN-IR-NEXT: s_or_b64 s[10:11], vcc, s[10:11]
|
||||
; GCN-IR-NEXT: v_mov_b32_e32 v16, v9
|
||||
; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[10:11]
|
||||
; GCN-IR-NEXT: s_cbranch_execnz BB1_3
|
||||
; GCN-IR-NEXT: s_cbranch_execnz .LBB1_3
|
||||
; GCN-IR-NEXT: ; %bb.4: ; %Flow
|
||||
; GCN-IR-NEXT: s_or_b64 exec, exec, s[10:11]
|
||||
; GCN-IR-NEXT: BB1_5: ; %Flow3
|
||||
; GCN-IR-NEXT: .LBB1_5: ; %Flow3
|
||||
; GCN-IR-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[7:8], 1
|
||||
; GCN-IR-NEXT: v_or_b32_e32 v10, v10, v3
|
||||
; GCN-IR-NEXT: v_or_b32_e32 v9, v9, v2
|
||||
; GCN-IR-NEXT: BB1_6: ; %Flow4
|
||||
; GCN-IR-NEXT: .LBB1_6: ; %Flow4
|
||||
; GCN-IR-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; GCN-IR-NEXT: v_xor_b32_e32 v0, v5, v4
|
||||
; GCN-IR-NEXT: v_xor_b32_e32 v1, v1, v6
|
||||
|
@ -1033,7 +1033,7 @@ define amdgpu_kernel void @s_test_sdiv24_48(i48 addrspace(1)* %out, i48 %x, i48
|
|||
; GCN-IR-NEXT: s_xor_b64 s[22:23], s[18:19], -1
|
||||
; GCN-IR-NEXT: s_and_b64 s[20:21], s[22:23], s[20:21]
|
||||
; GCN-IR-NEXT: s_and_b64 vcc, exec, s[20:21]
|
||||
; GCN-IR-NEXT: s_cbranch_vccz BB9_5
|
||||
; GCN-IR-NEXT: s_cbranch_vccz .LBB9_5
|
||||
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
|
||||
; GCN-IR-NEXT: s_add_u32 s18, s12, 1
|
||||
; GCN-IR-NEXT: v_mov_b32_e32 v0, s12
|
||||
|
@ -1043,7 +1043,7 @@ define amdgpu_kernel void @s_test_sdiv24_48(i48 addrspace(1)* %out, i48 %x, i48
|
|||
; GCN-IR-NEXT: s_sub_i32 s12, 63, s12
|
||||
; GCN-IR-NEXT: s_andn2_b64 vcc, exec, vcc
|
||||
; GCN-IR-NEXT: s_lshl_b64 s[12:13], s[10:11], s12
|
||||
; GCN-IR-NEXT: s_cbranch_vccz BB9_4
|
||||
; GCN-IR-NEXT: s_cbranch_vccz .LBB9_4
|
||||
; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader
|
||||
; GCN-IR-NEXT: s_lshr_b64 s[18:19], s[10:11], s18
|
||||
; GCN-IR-NEXT: s_add_u32 s20, s6, -1
|
||||
|
@ -1054,7 +1054,7 @@ define amdgpu_kernel void @s_test_sdiv24_48(i48 addrspace(1)* %out, i48 %x, i48
|
|||
; GCN-IR-NEXT: s_addc_u32 s11, s9, s15
|
||||
; GCN-IR-NEXT: s_mov_b64 s[14:15], 0
|
||||
; GCN-IR-NEXT: s_mov_b32 s9, 0
|
||||
; GCN-IR-NEXT: BB9_3: ; %udiv-do-while
|
||||
; GCN-IR-NEXT: .LBB9_3: ; %udiv-do-while
|
||||
; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-IR-NEXT: s_lshl_b64 s[16:17], s[18:19], 1
|
||||
; GCN-IR-NEXT: s_lshr_b32 s8, s13, 31
|
||||
|
@ -1076,19 +1076,19 @@ define amdgpu_kernel void @s_test_sdiv24_48(i48 addrspace(1)* %out, i48 %x, i48
|
|||
; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[10:11], v[0:1]
|
||||
; GCN-IR-NEXT: s_mov_b64 s[14:15], s[8:9]
|
||||
; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-IR-NEXT: s_cbranch_vccz BB9_3
|
||||
; GCN-IR-NEXT: BB9_4: ; %Flow3
|
||||
; GCN-IR-NEXT: s_cbranch_vccz .LBB9_3
|
||||
; GCN-IR-NEXT: .LBB9_4: ; %Flow3
|
||||
; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[12:13], 1
|
||||
; GCN-IR-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7]
|
||||
; GCN-IR-NEXT: v_mov_b32_e32 v0, s6
|
||||
; GCN-IR-NEXT: v_mov_b32_e32 v1, s7
|
||||
; GCN-IR-NEXT: s_branch BB9_6
|
||||
; GCN-IR-NEXT: BB9_5:
|
||||
; GCN-IR-NEXT: s_branch .LBB9_6
|
||||
; GCN-IR-NEXT: .LBB9_5:
|
||||
; GCN-IR-NEXT: v_mov_b32_e32 v0, s11
|
||||
; GCN-IR-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[18:19]
|
||||
; GCN-IR-NEXT: v_mov_b32_e32 v0, s10
|
||||
; GCN-IR-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[18:19]
|
||||
; GCN-IR-NEXT: BB9_6: ; %udiv-end
|
||||
; GCN-IR-NEXT: .LBB9_6: ; %udiv-end
|
||||
; GCN-IR-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1]
|
||||
; GCN-IR-NEXT: v_xor_b32_e32 v0, s0, v0
|
||||
; GCN-IR-NEXT: v_xor_b32_e32 v1, s1, v1
|
||||
|
@ -1247,7 +1247,7 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(i64 addrspace(1)* %out, i64 %x)
|
|||
; GCN-IR-NEXT: s_xor_b64 s[16:17], s[12:13], -1
|
||||
; GCN-IR-NEXT: s_and_b64 s[14:15], s[16:17], s[14:15]
|
||||
; GCN-IR-NEXT: s_and_b64 vcc, exec, s[14:15]
|
||||
; GCN-IR-NEXT: s_cbranch_vccz BB10_5
|
||||
; GCN-IR-NEXT: s_cbranch_vccz .LBB10_5
|
||||
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
|
||||
; GCN-IR-NEXT: s_add_u32 s12, s10, 1
|
||||
; GCN-IR-NEXT: v_mov_b32_e32 v0, s10
|
||||
|
@ -1257,7 +1257,7 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(i64 addrspace(1)* %out, i64 %x)
|
|||
; GCN-IR-NEXT: s_sub_i32 s9, 63, s10
|
||||
; GCN-IR-NEXT: s_andn2_b64 vcc, exec, vcc
|
||||
; GCN-IR-NEXT: s_lshl_b64 s[10:11], 24, s9
|
||||
; GCN-IR-NEXT: s_cbranch_vccz BB10_4
|
||||
; GCN-IR-NEXT: s_cbranch_vccz .LBB10_4
|
||||
; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader
|
||||
; GCN-IR-NEXT: s_lshr_b64 s[14:15], 24, s12
|
||||
; GCN-IR-NEXT: s_add_u32 s16, s2, -1
|
||||
|
@ -1266,7 +1266,7 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(i64 addrspace(1)* %out, i64 %x)
|
|||
; GCN-IR-NEXT: s_subb_u32 s9, 0, 0
|
||||
; GCN-IR-NEXT: s_mov_b64 s[12:13], 0
|
||||
; GCN-IR-NEXT: s_mov_b32 s7, 0
|
||||
; GCN-IR-NEXT: BB10_3: ; %udiv-do-while
|
||||
; GCN-IR-NEXT: .LBB10_3: ; %udiv-do-while
|
||||
; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-IR-NEXT: s_lshl_b64 s[14:15], s[14:15], 1
|
||||
; GCN-IR-NEXT: s_lshr_b32 s6, s11, 31
|
||||
|
@ -1288,17 +1288,17 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(i64 addrspace(1)* %out, i64 %x)
|
|||
; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[8:9], v[0:1]
|
||||
; GCN-IR-NEXT: s_mov_b64 s[12:13], s[6:7]
|
||||
; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-IR-NEXT: s_cbranch_vccz BB10_3
|
||||
; GCN-IR-NEXT: BB10_4: ; %Flow5
|
||||
; GCN-IR-NEXT: s_cbranch_vccz .LBB10_3
|
||||
; GCN-IR-NEXT: .LBB10_4: ; %Flow5
|
||||
; GCN-IR-NEXT: s_lshl_b64 s[2:3], s[10:11], 1
|
||||
; GCN-IR-NEXT: s_or_b64 s[2:3], s[6:7], s[2:3]
|
||||
; GCN-IR-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GCN-IR-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GCN-IR-NEXT: s_branch BB10_6
|
||||
; GCN-IR-NEXT: BB10_5:
|
||||
; GCN-IR-NEXT: s_branch .LBB10_6
|
||||
; GCN-IR-NEXT: .LBB10_5:
|
||||
; GCN-IR-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GCN-IR-NEXT: v_cndmask_b32_e64 v0, 24, 0, s[12:13]
|
||||
; GCN-IR-NEXT: BB10_6: ; %udiv-end
|
||||
; GCN-IR-NEXT: .LBB10_6: ; %udiv-end
|
||||
; GCN-IR-NEXT: v_xor_b32_e32 v0, s4, v0
|
||||
; GCN-IR-NEXT: v_xor_b32_e32 v1, s5, v1
|
||||
; GCN-IR-NEXT: v_mov_b32_e32 v2, s5
|
||||
|
@ -1448,7 +1448,7 @@ define i64 @v_test_sdiv_k_num_i64(i64 %x) {
|
|||
; GCN-IR-NEXT: v_mov_b32_e32 v7, v9
|
||||
; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], vcc
|
||||
; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; GCN-IR-NEXT: s_cbranch_execz BB11_6
|
||||
; GCN-IR-NEXT: s_cbranch_execz .LBB11_6
|
||||
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
|
||||
; GCN-IR-NEXT: v_add_i32_e32 v10, vcc, 1, v4
|
||||
; GCN-IR-NEXT: v_addc_u32_e32 v11, vcc, 0, v5, vcc
|
||||
|
@ -1460,7 +1460,7 @@ define i64 @v_test_sdiv_k_num_i64(i64 %x) {
|
|||
; GCN-IR-NEXT: s_mov_b64 s[10:11], 0
|
||||
; GCN-IR-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GCN-IR-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
|
||||
; GCN-IR-NEXT: s_cbranch_execz BB11_5
|
||||
; GCN-IR-NEXT: s_cbranch_execz .LBB11_5
|
||||
; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader
|
||||
; GCN-IR-NEXT: v_add_i32_e32 v14, vcc, -1, v0
|
||||
; GCN-IR-NEXT: v_addc_u32_e32 v15, vcc, -1, v1, vcc
|
||||
|
@ -1470,7 +1470,7 @@ define i64 @v_test_sdiv_k_num_i64(i64 %x) {
|
|||
; GCN-IR-NEXT: v_subb_u32_e32 v9, vcc, 0, v9, vcc
|
||||
; GCN-IR-NEXT: v_mov_b32_e32 v13, 0
|
||||
; GCN-IR-NEXT: v_mov_b32_e32 v7, 0
|
||||
; GCN-IR-NEXT: BB11_3: ; %udiv-do-while
|
||||
; GCN-IR-NEXT: .LBB11_3: ; %udiv-do-while
|
||||
; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-IR-NEXT: v_lshl_b64 v[10:11], v[10:11], 1
|
||||
; GCN-IR-NEXT: v_lshrrev_b32_e32 v6, 31, v5
|
||||
|
@ -1495,15 +1495,15 @@ define i64 @v_test_sdiv_k_num_i64(i64 %x) {
|
|||
; GCN-IR-NEXT: s_or_b64 s[10:11], vcc, s[10:11]
|
||||
; GCN-IR-NEXT: v_mov_b32_e32 v12, v6
|
||||
; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[10:11]
|
||||
; GCN-IR-NEXT: s_cbranch_execnz BB11_3
|
||||
; GCN-IR-NEXT: s_cbranch_execnz .LBB11_3
|
||||
; GCN-IR-NEXT: ; %bb.4: ; %Flow
|
||||
; GCN-IR-NEXT: s_or_b64 exec, exec, s[10:11]
|
||||
; GCN-IR-NEXT: BB11_5: ; %Flow3
|
||||
; GCN-IR-NEXT: .LBB11_5: ; %Flow3
|
||||
; GCN-IR-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GCN-IR-NEXT: v_lshl_b64 v[0:1], v[4:5], 1
|
||||
; GCN-IR-NEXT: v_or_b32_e32 v7, v7, v1
|
||||
; GCN-IR-NEXT: v_or_b32_e32 v6, v6, v0
|
||||
; GCN-IR-NEXT: BB11_6: ; %Flow4
|
||||
; GCN-IR-NEXT: .LBB11_6: ; %Flow4
|
||||
; GCN-IR-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; GCN-IR-NEXT: v_xor_b32_e32 v0, v6, v2
|
||||
; GCN-IR-NEXT: v_xor_b32_e32 v1, v7, v3
|
||||
|
@ -1652,7 +1652,7 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) {
|
|||
; GCN-IR-NEXT: v_mov_b32_e32 v7, v9
|
||||
; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], vcc
|
||||
; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; GCN-IR-NEXT: s_cbranch_execz BB12_6
|
||||
; GCN-IR-NEXT: s_cbranch_execz .LBB12_6
|
||||
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
|
||||
; GCN-IR-NEXT: v_add_i32_e32 v10, vcc, 1, v4
|
||||
; GCN-IR-NEXT: v_addc_u32_e32 v11, vcc, 0, v5, vcc
|
||||
|
@ -1664,7 +1664,7 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) {
|
|||
; GCN-IR-NEXT: s_mov_b64 s[10:11], 0
|
||||
; GCN-IR-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GCN-IR-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
|
||||
; GCN-IR-NEXT: s_cbranch_execz BB12_5
|
||||
; GCN-IR-NEXT: s_cbranch_execz .LBB12_5
|
||||
; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader
|
||||
; GCN-IR-NEXT: v_add_i32_e32 v14, vcc, -1, v0
|
||||
; GCN-IR-NEXT: s_mov_b64 s[4:5], 0x8000
|
||||
|
@ -1675,7 +1675,7 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) {
|
|||
; GCN-IR-NEXT: v_subb_u32_e32 v9, vcc, 0, v9, vcc
|
||||
; GCN-IR-NEXT: v_mov_b32_e32 v13, 0
|
||||
; GCN-IR-NEXT: v_mov_b32_e32 v7, 0
|
||||
; GCN-IR-NEXT: BB12_3: ; %udiv-do-while
|
||||
; GCN-IR-NEXT: .LBB12_3: ; %udiv-do-while
|
||||
; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-IR-NEXT: v_lshl_b64 v[10:11], v[10:11], 1
|
||||
; GCN-IR-NEXT: v_lshrrev_b32_e32 v6, 31, v5
|
||||
|
@ -1700,15 +1700,15 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) {
|
|||
; GCN-IR-NEXT: s_or_b64 s[10:11], vcc, s[10:11]
|
||||
; GCN-IR-NEXT: v_mov_b32_e32 v12, v6
|
||||
; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[10:11]
|
||||
; GCN-IR-NEXT: s_cbranch_execnz BB12_3
|
||||
; GCN-IR-NEXT: s_cbranch_execnz .LBB12_3
|
||||
; GCN-IR-NEXT: ; %bb.4: ; %Flow
|
||||
; GCN-IR-NEXT: s_or_b64 exec, exec, s[10:11]
|
||||
; GCN-IR-NEXT: BB12_5: ; %Flow3
|
||||
; GCN-IR-NEXT: .LBB12_5: ; %Flow3
|
||||
; GCN-IR-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GCN-IR-NEXT: v_lshl_b64 v[0:1], v[4:5], 1
|
||||
; GCN-IR-NEXT: v_or_b32_e32 v7, v7, v1
|
||||
; GCN-IR-NEXT: v_or_b32_e32 v6, v6, v0
|
||||
; GCN-IR-NEXT: BB12_6: ; %Flow4
|
||||
; GCN-IR-NEXT: .LBB12_6: ; %Flow4
|
||||
; GCN-IR-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; GCN-IR-NEXT: v_xor_b32_e32 v0, v6, v2
|
||||
; GCN-IR-NEXT: v_xor_b32_e32 v1, v7, v3
|
||||
|
@ -1754,7 +1754,7 @@ define i64 @v_test_sdiv_pow2_k_den_i64(i64 %x) {
|
|||
; GCN-IR-NEXT: v_cndmask_b32_e64 v5, v7, 0, s[4:5]
|
||||
; GCN-IR-NEXT: s_and_b64 s[4:5], s[6:7], vcc
|
||||
; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; GCN-IR-NEXT: s_cbranch_execz BB13_6
|
||||
; GCN-IR-NEXT: s_cbranch_execz .LBB13_6
|
||||
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
|
||||
; GCN-IR-NEXT: v_add_i32_e32 v9, vcc, 1, v3
|
||||
; GCN-IR-NEXT: v_addc_u32_e32 v10, vcc, 0, v4, vcc
|
||||
|
@ -1766,7 +1766,7 @@ define i64 @v_test_sdiv_pow2_k_den_i64(i64 %x) {
|
|||
; GCN-IR-NEXT: s_mov_b64 s[10:11], 0
|
||||
; GCN-IR-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GCN-IR-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
|
||||
; GCN-IR-NEXT: s_cbranch_execz BB13_5
|
||||
; GCN-IR-NEXT: s_cbranch_execz .LBB13_5
|
||||
; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader
|
||||
; GCN-IR-NEXT: v_lshr_b64 v[9:10], v[7:8], v9
|
||||
; GCN-IR-NEXT: v_add_i32_e32 v7, vcc, 0xffffffcf, v0
|
||||
|
@ -1775,7 +1775,7 @@ define i64 @v_test_sdiv_pow2_k_den_i64(i64 %x) {
|
|||
; GCN-IR-NEXT: v_mov_b32_e32 v12, 0
|
||||
; GCN-IR-NEXT: v_mov_b32_e32 v6, 0
|
||||
; GCN-IR-NEXT: s_movk_i32 s12, 0x7fff
|
||||
; GCN-IR-NEXT: BB13_3: ; %udiv-do-while
|
||||
; GCN-IR-NEXT: .LBB13_3: ; %udiv-do-while
|
||||
; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GCN-IR-NEXT: v_lshl_b64 v[9:10], v[9:10], 1
|
||||
; GCN-IR-NEXT: v_lshrrev_b32_e32 v0, 31, v4
|
||||
|
@ -1799,15 +1799,15 @@ define i64 @v_test_sdiv_pow2_k_den_i64(i64 %x) {
|
|||
; GCN-IR-NEXT: s_or_b64 s[10:11], vcc, s[10:11]
|
||||
; GCN-IR-NEXT: v_mov_b32_e32 v11, v5
|
||||
; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[10:11]
|
||||
; GCN-IR-NEXT: s_cbranch_execnz BB13_3
|
||||
; GCN-IR-NEXT: s_cbranch_execnz .LBB13_3
|
||||
; GCN-IR-NEXT: ; %bb.4: ; %Flow
|
||||
; GCN-IR-NEXT: s_or_b64 exec, exec, s[10:11]
|
||||
; GCN-IR-NEXT: BB13_5: ; %Flow3
|
||||
; GCN-IR-NEXT: .LBB13_5: ; %Flow3
|
||||
; GCN-IR-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GCN-IR-NEXT: v_lshl_b64 v[3:4], v[3:4], 1
|
||||
; GCN-IR-NEXT: v_or_b32_e32 v6, v6, v4
|
||||
; GCN-IR-NEXT: v_or_b32_e32 v5, v5, v3
|
||||
; GCN-IR-NEXT: BB13_6: ; %Flow4
|
||||
; GCN-IR-NEXT: .LBB13_6: ; %Flow4
|
||||
; GCN-IR-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; GCN-IR-NEXT: v_xor_b32_e32 v0, v5, v2
|
||||
; GCN-IR-NEXT: v_xor_b32_e32 v3, v6, v1
|
||||
|
|
|
@ -16,16 +16,16 @@ define amdgpu_kernel void @sgpr_if_else_salu_br(i32 addrspace(1)* %out, i32 %a,
|
|||
; SI-NEXT: s_load_dword s0, s[0:1], 0xf
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: s_cmp_lg_u32 s8, 0
|
||||
; SI-NEXT: s_cbranch_scc0 BB0_2
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB0_2
|
||||
; SI-NEXT: ; %bb.1: ; %else
|
||||
; SI-NEXT: s_add_i32 s2, s11, s0
|
||||
; SI-NEXT: s_cbranch_execz BB0_3
|
||||
; SI-NEXT: s_branch BB0_4
|
||||
; SI-NEXT: BB0_2:
|
||||
; SI-NEXT: s_cbranch_execz .LBB0_3
|
||||
; SI-NEXT: s_branch .LBB0_4
|
||||
; SI-NEXT: .LBB0_2:
|
||||
; SI-NEXT: ; implicit-def: $sgpr2
|
||||
; SI-NEXT: BB0_3: ; %if
|
||||
; SI-NEXT: .LBB0_3: ; %if
|
||||
; SI-NEXT: s_sub_i32 s2, s9, s10
|
||||
; SI-NEXT: BB0_4: ; %endif
|
||||
; SI-NEXT: .LBB0_4: ; %endif
|
||||
; SI-NEXT: s_add_i32 s0, s2, s8
|
||||
; SI-NEXT: s_mov_b32 s7, 0xf000
|
||||
; SI-NEXT: s_mov_b32 s6, -1
|
||||
|
@ -59,22 +59,22 @@ define amdgpu_kernel void @sgpr_if_else_salu_br_opt(i32 addrspace(1)* %out, [8 x
|
|||
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: s_cmp_lg_u32 s6, 0
|
||||
; SI-NEXT: s_cbranch_scc0 BB1_2
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB1_2
|
||||
; SI-NEXT: ; %bb.1: ; %else
|
||||
; SI-NEXT: s_load_dword s2, s[0:1], 0x2e
|
||||
; SI-NEXT: s_load_dword s3, s[0:1], 0x37
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: s_add_i32 s7, s2, s3
|
||||
; SI-NEXT: s_cbranch_execz BB1_3
|
||||
; SI-NEXT: s_branch BB1_4
|
||||
; SI-NEXT: BB1_2:
|
||||
; SI-NEXT: s_cbranch_execz .LBB1_3
|
||||
; SI-NEXT: s_branch .LBB1_4
|
||||
; SI-NEXT: .LBB1_2:
|
||||
; SI-NEXT: ; implicit-def: $sgpr7
|
||||
; SI-NEXT: BB1_3: ; %if
|
||||
; SI-NEXT: .LBB1_3: ; %if
|
||||
; SI-NEXT: s_load_dword s2, s[0:1], 0x1c
|
||||
; SI-NEXT: s_load_dword s0, s[0:1], 0x25
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: s_add_i32 s7, s2, s0
|
||||
; SI-NEXT: BB1_4: ; %endif
|
||||
; SI-NEXT: .LBB1_4: ; %endif
|
||||
; SI-NEXT: s_add_i32 s0, s7, s6
|
||||
; SI-NEXT: s_mov_b32 s7, 0xf000
|
||||
; SI-NEXT: s_mov_b32 s6, -1
|
||||
|
@ -113,11 +113,11 @@ define amdgpu_kernel void @sgpr_if_else_valu_br(i32 addrspace(1)* %out, float %a
|
|||
; SI-NEXT: v_cmp_lg_f32_e32 vcc, 0, v0
|
||||
; SI-NEXT: s_and_saveexec_b64 s[6:7], vcc
|
||||
; SI-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
|
||||
; SI-NEXT: s_cbranch_execz BB2_2
|
||||
; SI-NEXT: s_cbranch_execz .LBB2_2
|
||||
; SI-NEXT: ; %bb.1: ; %else
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: s_add_i32 s8, s2, s3
|
||||
; SI-NEXT: BB2_2: ; %Flow
|
||||
; SI-NEXT: .LBB2_2: ; %Flow
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: s_or_saveexec_b64 s[2:3], s[6:7]
|
||||
; SI-NEXT: v_mov_b32_e32 v0, s8
|
||||
|
@ -161,7 +161,7 @@ define amdgpu_kernel void @sgpr_if_else_valu_cmp_phi_br(i32 addrspace(1)* %out,
|
|||
; SI-NEXT: ; implicit-def: $sgpr8_sgpr9
|
||||
; SI-NEXT: s_and_saveexec_b64 s[10:11], vcc
|
||||
; SI-NEXT: s_xor_b64 s[10:11], exec, s[10:11]
|
||||
; SI-NEXT: s_cbranch_execz BB3_2
|
||||
; SI-NEXT: s_cbranch_execz .LBB3_2
|
||||
; SI-NEXT: ; %bb.1: ; %else
|
||||
; SI-NEXT: s_mov_b32 s3, 0xf000
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
|
@ -172,11 +172,11 @@ define amdgpu_kernel void @sgpr_if_else_valu_cmp_phi_br(i32 addrspace(1)* %out,
|
|||
; SI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v0
|
||||
; SI-NEXT: s_and_b64 s[8:9], vcc, exec
|
||||
; SI-NEXT: ; implicit-def: $vgpr0
|
||||
; SI-NEXT: BB3_2: ; %Flow
|
||||
; SI-NEXT: .LBB3_2: ; %Flow
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: s_or_saveexec_b64 s[0:1], s[10:11]
|
||||
; SI-NEXT: s_xor_b64 exec, exec, s[0:1]
|
||||
; SI-NEXT: s_cbranch_execz BB3_4
|
||||
; SI-NEXT: s_cbranch_execz .LBB3_4
|
||||
; SI-NEXT: ; %bb.3: ; %if
|
||||
; SI-NEXT: s_mov_b32 s15, 0xf000
|
||||
; SI-NEXT: s_mov_b32 s14, 0
|
||||
|
@ -189,7 +189,7 @@ define amdgpu_kernel void @sgpr_if_else_valu_cmp_phi_br(i32 addrspace(1)* %out,
|
|||
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; SI-NEXT: s_and_b64 s[6:7], vcc, exec
|
||||
; SI-NEXT: s_or_b64 s[8:9], s[2:3], s[6:7]
|
||||
; SI-NEXT: BB3_4: ; %endif
|
||||
; SI-NEXT: .LBB3_4: ; %endif
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; SI-NEXT: s_mov_b32 s7, 0xf000
|
||||
; SI-NEXT: s_mov_b32 s6, -1
|
||||
|
|
|
@ -315,7 +315,7 @@ ENDIF69: ; preds = %LOOP68
|
|||
; CHECK-LABEL:{{^}}sample_rsrc
|
||||
|
||||
; CHECK: s_cmp_eq_u32
|
||||
; CHECK: s_cbranch_scc0 [[END:BB[0-9]+_[0-9]+]]
|
||||
; CHECK: s_cbranch_scc0 [[END:.LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; CHECK: v_add_{{[iu]}}32_e32 v[[ADD:[0-9]+]], vcc, 1, v{{[0-9]+}}
|
||||
|
||||
|
|
|
@ -111,7 +111,7 @@ define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(i32 addrspace(1)* %o
|
|||
; GCN-NEXT: s_mov_b32 s5, 0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_cmp_lg_u32 s4, s5
|
||||
; GCN-NEXT: s_cbranch_scc1 BB0_2
|
||||
; GCN-NEXT: s_cbranch_scc1 .LBB0_2
|
||||
; GCN-NEXT: ; %bb.1: ; %bb0
|
||||
; GCN-NEXT: v_readlane_b32 s4, v23, 0
|
||||
; GCN-NEXT: v_readlane_b32 s5, v23, 1
|
||||
|
@ -202,7 +202,7 @@ define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(i32 addrspace(1)* %o
|
|||
; GCN-NEXT: ;;#ASMSTART
|
||||
; GCN-NEXT: ; use s[4:5]
|
||||
; GCN-NEXT: ;;#ASMEND
|
||||
; GCN-NEXT: BB0_2: ; %ret
|
||||
; GCN-NEXT: .LBB0_2: ; %ret
|
||||
; GCN-NEXT: s_endpgm
|
||||
call void asm sideeffect "", "~{v[0:7]}" () #0
|
||||
call void asm sideeffect "", "~{v[8:15]}" () #0
|
||||
|
|
|
@ -19,12 +19,12 @@ define amdgpu_ps float @uniform_kill(float %a, i32 %b, float %c) {
|
|||
; SI-NEXT: s_wqm_b64 s[4:5], s[2:3]
|
||||
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], exec
|
||||
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5]
|
||||
; SI-NEXT: s_cbranch_scc0 BB0_6
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB0_6
|
||||
; SI-NEXT: ; %bb.3: ; %endif1
|
||||
; SI-NEXT: s_and_b64 exec, exec, s[0:1]
|
||||
; SI-NEXT: v_mov_b32_e32 v0, 0
|
||||
; SI-NEXT: s_and_saveexec_b64 s[0:1], s[2:3]
|
||||
; SI-NEXT: s_cbranch_execz BB0_5
|
||||
; SI-NEXT: s_cbranch_execz .LBB0_5
|
||||
; SI-NEXT: ; %bb.4: ; %if2
|
||||
; SI-NEXT: s_mov_b32 s3, 0
|
||||
; SI-NEXT: v_add_f32_e32 v0, 1.0, v2
|
||||
|
@ -34,14 +34,14 @@ define amdgpu_ps float @uniform_kill(float %a, i32 %b, float %c) {
|
|||
; SI-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 offset:4 glc
|
||||
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
||||
; SI-NEXT: v_cvt_f32_i32_e32 v0, v0
|
||||
; SI-NEXT: BB0_5: ; %endif2
|
||||
; SI-NEXT: .LBB0_5: ; %endif2
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; SI-NEXT: s_branch BB0_7
|
||||
; SI-NEXT: BB0_6:
|
||||
; SI-NEXT: s_branch .LBB0_7
|
||||
; SI-NEXT: .LBB0_6:
|
||||
; SI-NEXT: s_mov_b64 exec, 0
|
||||
; SI-NEXT: exp null off, off, off, off done vm
|
||||
; SI-NEXT: s_endpgm
|
||||
; SI-NEXT: BB0_7:
|
||||
; SI-NEXT: .LBB0_7:
|
||||
;
|
||||
; FLAT-LABEL: uniform_kill:
|
||||
; FLAT: ; %bb.0: ; %entry
|
||||
|
@ -59,12 +59,12 @@ define amdgpu_ps float @uniform_kill(float %a, i32 %b, float %c) {
|
|||
; FLAT-NEXT: s_wqm_b64 s[4:5], s[2:3]
|
||||
; FLAT-NEXT: s_xor_b64 s[4:5], s[4:5], exec
|
||||
; FLAT-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5]
|
||||
; FLAT-NEXT: s_cbranch_scc0 BB0_6
|
||||
; FLAT-NEXT: s_cbranch_scc0 .LBB0_6
|
||||
; FLAT-NEXT: ; %bb.3: ; %endif1
|
||||
; FLAT-NEXT: s_and_b64 exec, exec, s[0:1]
|
||||
; FLAT-NEXT: v_mov_b32_e32 v0, 0
|
||||
; FLAT-NEXT: s_and_saveexec_b64 s[0:1], s[2:3]
|
||||
; FLAT-NEXT: s_cbranch_execz BB0_5
|
||||
; FLAT-NEXT: s_cbranch_execz .LBB0_5
|
||||
; FLAT-NEXT: ; %bb.4: ; %if2
|
||||
; FLAT-NEXT: s_mov_b32 s3, 0
|
||||
; FLAT-NEXT: v_add_f32_e32 v0, 1.0, v2
|
||||
|
@ -74,14 +74,14 @@ define amdgpu_ps float @uniform_kill(float %a, i32 %b, float %c) {
|
|||
; FLAT-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 offset:4 glc
|
||||
; FLAT-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLAT-NEXT: v_cvt_f32_i32_e32 v0, v0
|
||||
; FLAT-NEXT: BB0_5: ; %endif2
|
||||
; FLAT-NEXT: .LBB0_5: ; %endif2
|
||||
; FLAT-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; FLAT-NEXT: s_branch BB0_7
|
||||
; FLAT-NEXT: BB0_6:
|
||||
; FLAT-NEXT: s_branch .LBB0_7
|
||||
; FLAT-NEXT: .LBB0_6:
|
||||
; FLAT-NEXT: s_mov_b64 exec, 0
|
||||
; FLAT-NEXT: exp null off, off, off, off done vm
|
||||
; FLAT-NEXT: s_endpgm
|
||||
; FLAT-NEXT: BB0_7:
|
||||
; FLAT-NEXT: .LBB0_7:
|
||||
entry:
|
||||
%.1 = fptosi float %a to i32
|
||||
%.2 = or i32 %b, %.1
|
||||
|
|
|
@ -13,12 +13,12 @@ define amdgpu_kernel void @break_inserted_outside_of_loop(i32 addrspace(1)* %out
|
|||
; SI-NEXT: v_and_b32_e32 v0, 1, v0
|
||||
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
|
||||
; SI-NEXT: s_mov_b64 s[0:1], 0
|
||||
; SI-NEXT: BB0_1: ; %ENDIF
|
||||
; SI-NEXT: .LBB0_1: ; %ENDIF
|
||||
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; SI-NEXT: s_and_b64 s[2:3], exec, vcc
|
||||
; SI-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
|
||||
; SI-NEXT: s_andn2_b64 exec, exec, s[0:1]
|
||||
; SI-NEXT: s_cbranch_execnz BB0_1
|
||||
; SI-NEXT: s_cbranch_execnz .LBB0_1
|
||||
; SI-NEXT: ; %bb.2: ; %ENDLOOP
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; SI-NEXT: s_mov_b32 s7, 0xf000
|
||||
|
@ -37,12 +37,12 @@ define amdgpu_kernel void @break_inserted_outside_of_loop(i32 addrspace(1)* %out
|
|||
; FLAT-NEXT: v_and_b32_e32 v0, 1, v0
|
||||
; FLAT-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
|
||||
; FLAT-NEXT: s_mov_b64 s[0:1], 0
|
||||
; FLAT-NEXT: BB0_1: ; %ENDIF
|
||||
; FLAT-NEXT: .LBB0_1: ; %ENDIF
|
||||
; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; FLAT-NEXT: s_and_b64 s[2:3], exec, vcc
|
||||
; FLAT-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
|
||||
; FLAT-NEXT: s_andn2_b64 exec, exec, s[0:1]
|
||||
; FLAT-NEXT: s_cbranch_execnz BB0_1
|
||||
; FLAT-NEXT: s_cbranch_execnz .LBB0_1
|
||||
; FLAT-NEXT: ; %bb.2: ; %ENDLOOP
|
||||
; FLAT-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; FLAT-NEXT: s_mov_b32 s7, 0xf000
|
||||
|
@ -72,21 +72,21 @@ define amdgpu_kernel void @phi_cond_outside_loop(i32 %b) {
|
|||
; SI-NEXT: s_mov_b64 s[2:3], 0
|
||||
; SI-NEXT: s_mov_b64 s[4:5], 0
|
||||
; SI-NEXT: s_and_saveexec_b64 s[6:7], vcc
|
||||
; SI-NEXT: s_cbranch_execz BB1_2
|
||||
; SI-NEXT: s_cbranch_execz .LBB1_2
|
||||
; SI-NEXT: ; %bb.1: ; %else
|
||||
; SI-NEXT: s_load_dword s0, s[0:1], 0x9
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: s_cmp_eq_u32 s0, 0
|
||||
; SI-NEXT: s_cselect_b64 s[0:1], -1, 0
|
||||
; SI-NEXT: s_and_b64 s[4:5], s[0:1], exec
|
||||
; SI-NEXT: BB1_2: ; %endif
|
||||
; SI-NEXT: .LBB1_2: ; %endif
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; SI-NEXT: BB1_3: ; %loop
|
||||
; SI-NEXT: .LBB1_3: ; %loop
|
||||
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; SI-NEXT: s_and_b64 s[0:1], exec, s[4:5]
|
||||
; SI-NEXT: s_or_b64 s[2:3], s[0:1], s[2:3]
|
||||
; SI-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; SI-NEXT: s_cbranch_execnz BB1_3
|
||||
; SI-NEXT: s_cbranch_execnz .LBB1_3
|
||||
; SI-NEXT: ; %bb.4: ; %exit
|
||||
; SI-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -97,21 +97,21 @@ define amdgpu_kernel void @phi_cond_outside_loop(i32 %b) {
|
|||
; FLAT-NEXT: s_mov_b64 s[2:3], 0
|
||||
; FLAT-NEXT: s_mov_b64 s[4:5], 0
|
||||
; FLAT-NEXT: s_and_saveexec_b64 s[6:7], vcc
|
||||
; FLAT-NEXT: s_cbranch_execz BB1_2
|
||||
; FLAT-NEXT: s_cbranch_execz .LBB1_2
|
||||
; FLAT-NEXT: ; %bb.1: ; %else
|
||||
; FLAT-NEXT: s_load_dword s0, s[0:1], 0x24
|
||||
; FLAT-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; FLAT-NEXT: s_cmp_eq_u32 s0, 0
|
||||
; FLAT-NEXT: s_cselect_b64 s[0:1], -1, 0
|
||||
; FLAT-NEXT: s_and_b64 s[4:5], s[0:1], exec
|
||||
; FLAT-NEXT: BB1_2: ; %endif
|
||||
; FLAT-NEXT: .LBB1_2: ; %endif
|
||||
; FLAT-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; FLAT-NEXT: BB1_3: ; %loop
|
||||
; FLAT-NEXT: .LBB1_3: ; %loop
|
||||
; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; FLAT-NEXT: s_and_b64 s[0:1], exec, s[4:5]
|
||||
; FLAT-NEXT: s_or_b64 s[2:3], s[0:1], s[2:3]
|
||||
; FLAT-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; FLAT-NEXT: s_cbranch_execnz BB1_3
|
||||
; FLAT-NEXT: s_cbranch_execnz .LBB1_3
|
||||
; FLAT-NEXT: ; %bb.4: ; %exit
|
||||
; FLAT-NEXT: s_endpgm
|
||||
entry:
|
||||
|
@ -184,58 +184,58 @@ define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32
|
|||
; SI-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s8
|
||||
; SI-NEXT: s_and_b64 s[4:5], exec, s[4:5]
|
||||
; SI-NEXT: v_mov_b32_e32 v0, 3
|
||||
; SI-NEXT: s_branch BB3_4
|
||||
; SI-NEXT: BB3_1: ; %Flow6
|
||||
; SI-NEXT: s_branch .LBB3_4
|
||||
; SI-NEXT: .LBB3_1: ; %Flow6
|
||||
; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1
|
||||
; SI-NEXT: s_mov_b64 s[8:9], 0
|
||||
; SI-NEXT: BB3_2: ; %Flow5
|
||||
; SI-NEXT: .LBB3_2: ; %Flow5
|
||||
; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1
|
||||
; SI-NEXT: s_mov_b64 s[12:13], 0
|
||||
; SI-NEXT: BB3_3: ; %Flow
|
||||
; SI-NEXT: .LBB3_3: ; %Flow
|
||||
; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1
|
||||
; SI-NEXT: s_and_b64 vcc, exec, s[10:11]
|
||||
; SI-NEXT: s_cbranch_vccnz BB3_8
|
||||
; SI-NEXT: BB3_4: ; %while.cond
|
||||
; SI-NEXT: s_cbranch_vccnz .LBB3_8
|
||||
; SI-NEXT: .LBB3_4: ; %while.cond
|
||||
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; SI-NEXT: s_mov_b64 s[12:13], -1
|
||||
; SI-NEXT: s_mov_b64 s[8:9], -1
|
||||
; SI-NEXT: s_mov_b64 s[10:11], -1
|
||||
; SI-NEXT: s_mov_b64 vcc, s[0:1]
|
||||
; SI-NEXT: s_cbranch_vccz BB3_3
|
||||
; SI-NEXT: s_cbranch_vccz .LBB3_3
|
||||
; SI-NEXT: ; %bb.5: ; %convex.exit
|
||||
; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1
|
||||
; SI-NEXT: s_mov_b64 s[8:9], -1
|
||||
; SI-NEXT: s_mov_b64 s[10:11], -1
|
||||
; SI-NEXT: s_mov_b64 vcc, s[2:3]
|
||||
; SI-NEXT: s_cbranch_vccz BB3_2
|
||||
; SI-NEXT: s_cbranch_vccz .LBB3_2
|
||||
; SI-NEXT: ; %bb.6: ; %if.end
|
||||
; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1
|
||||
; SI-NEXT: s_mov_b64 s[10:11], -1
|
||||
; SI-NEXT: s_mov_b64 vcc, s[4:5]
|
||||
; SI-NEXT: s_cbranch_vccz BB3_1
|
||||
; SI-NEXT: s_cbranch_vccz .LBB3_1
|
||||
; SI-NEXT: ; %bb.7: ; %if.else
|
||||
; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1
|
||||
; SI-NEXT: s_mov_b64 s[10:11], 0
|
||||
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: s_branch BB3_1
|
||||
; SI-NEXT: BB3_8: ; %loop.exit.guard4
|
||||
; SI-NEXT: s_branch .LBB3_1
|
||||
; SI-NEXT: .LBB3_8: ; %loop.exit.guard4
|
||||
; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1
|
||||
; SI-NEXT: s_and_b64 vcc, exec, s[8:9]
|
||||
; SI-NEXT: s_cbranch_vccz BB3_4
|
||||
; SI-NEXT: s_cbranch_vccz .LBB3_4
|
||||
; SI-NEXT: ; %bb.9: ; %loop.exit.guard
|
||||
; SI-NEXT: s_and_b64 vcc, exec, s[12:13]
|
||||
; SI-NEXT: s_cbranch_vccz BB3_13
|
||||
; SI-NEXT: s_cbranch_vccz .LBB3_13
|
||||
; SI-NEXT: ; %bb.10: ; %for.cond.preheader
|
||||
; SI-NEXT: s_cmpk_lt_i32 s14, 0x3e8
|
||||
; SI-NEXT: s_cbranch_scc0 BB3_13
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB3_13
|
||||
; SI-NEXT: ; %bb.11: ; %for.body
|
||||
; SI-NEXT: s_and_b64 vcc, exec, 0
|
||||
; SI-NEXT: BB3_12: ; %self.loop
|
||||
; SI-NEXT: .LBB3_12: ; %self.loop
|
||||
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; SI-NEXT: s_mov_b64 vcc, vcc
|
||||
; SI-NEXT: s_cbranch_vccz BB3_12
|
||||
; SI-NEXT: BB3_13: ; %DummyReturnBlock
|
||||
; SI-NEXT: s_cbranch_vccz .LBB3_12
|
||||
; SI-NEXT: .LBB3_13: ; %DummyReturnBlock
|
||||
; SI-NEXT: s_endpgm
|
||||
;
|
||||
; FLAT-LABEL: loop_land_info_assert:
|
||||
|
@ -260,58 +260,58 @@ define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32
|
|||
; FLAT-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s8
|
||||
; FLAT-NEXT: s_and_b64 s[4:5], exec, s[4:5]
|
||||
; FLAT-NEXT: v_mov_b32_e32 v0, 3
|
||||
; FLAT-NEXT: s_branch BB3_4
|
||||
; FLAT-NEXT: BB3_1: ; %Flow6
|
||||
; FLAT-NEXT: s_branch .LBB3_4
|
||||
; FLAT-NEXT: .LBB3_1: ; %Flow6
|
||||
; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1
|
||||
; FLAT-NEXT: s_mov_b64 s[8:9], 0
|
||||
; FLAT-NEXT: BB3_2: ; %Flow5
|
||||
; FLAT-NEXT: .LBB3_2: ; %Flow5
|
||||
; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1
|
||||
; FLAT-NEXT: s_mov_b64 s[12:13], 0
|
||||
; FLAT-NEXT: BB3_3: ; %Flow
|
||||
; FLAT-NEXT: .LBB3_3: ; %Flow
|
||||
; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1
|
||||
; FLAT-NEXT: s_and_b64 vcc, exec, s[10:11]
|
||||
; FLAT-NEXT: s_cbranch_vccnz BB3_8
|
||||
; FLAT-NEXT: BB3_4: ; %while.cond
|
||||
; FLAT-NEXT: s_cbranch_vccnz .LBB3_8
|
||||
; FLAT-NEXT: .LBB3_4: ; %while.cond
|
||||
; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; FLAT-NEXT: s_mov_b64 s[12:13], -1
|
||||
; FLAT-NEXT: s_mov_b64 s[8:9], -1
|
||||
; FLAT-NEXT: s_mov_b64 s[10:11], -1
|
||||
; FLAT-NEXT: s_mov_b64 vcc, s[0:1]
|
||||
; FLAT-NEXT: s_cbranch_vccz BB3_3
|
||||
; FLAT-NEXT: s_cbranch_vccz .LBB3_3
|
||||
; FLAT-NEXT: ; %bb.5: ; %convex.exit
|
||||
; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1
|
||||
; FLAT-NEXT: s_mov_b64 s[8:9], -1
|
||||
; FLAT-NEXT: s_mov_b64 s[10:11], -1
|
||||
; FLAT-NEXT: s_mov_b64 vcc, s[2:3]
|
||||
; FLAT-NEXT: s_cbranch_vccz BB3_2
|
||||
; FLAT-NEXT: s_cbranch_vccz .LBB3_2
|
||||
; FLAT-NEXT: ; %bb.6: ; %if.end
|
||||
; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1
|
||||
; FLAT-NEXT: s_mov_b64 s[10:11], -1
|
||||
; FLAT-NEXT: s_mov_b64 vcc, s[4:5]
|
||||
; FLAT-NEXT: s_cbranch_vccz BB3_1
|
||||
; FLAT-NEXT: s_cbranch_vccz .LBB3_1
|
||||
; FLAT-NEXT: ; %bb.7: ; %if.else
|
||||
; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1
|
||||
; FLAT-NEXT: s_mov_b64 s[10:11], 0
|
||||
; FLAT-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; FLAT-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLAT-NEXT: s_branch BB3_1
|
||||
; FLAT-NEXT: BB3_8: ; %loop.exit.guard4
|
||||
; FLAT-NEXT: s_branch .LBB3_1
|
||||
; FLAT-NEXT: .LBB3_8: ; %loop.exit.guard4
|
||||
; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1
|
||||
; FLAT-NEXT: s_and_b64 vcc, exec, s[8:9]
|
||||
; FLAT-NEXT: s_cbranch_vccz BB3_4
|
||||
; FLAT-NEXT: s_cbranch_vccz .LBB3_4
|
||||
; FLAT-NEXT: ; %bb.9: ; %loop.exit.guard
|
||||
; FLAT-NEXT: s_and_b64 vcc, exec, s[12:13]
|
||||
; FLAT-NEXT: s_cbranch_vccz BB3_13
|
||||
; FLAT-NEXT: s_cbranch_vccz .LBB3_13
|
||||
; FLAT-NEXT: ; %bb.10: ; %for.cond.preheader
|
||||
; FLAT-NEXT: s_cmpk_lt_i32 s14, 0x3e8
|
||||
; FLAT-NEXT: s_cbranch_scc0 BB3_13
|
||||
; FLAT-NEXT: s_cbranch_scc0 .LBB3_13
|
||||
; FLAT-NEXT: ; %bb.11: ; %for.body
|
||||
; FLAT-NEXT: s_and_b64 vcc, exec, 0
|
||||
; FLAT-NEXT: BB3_12: ; %self.loop
|
||||
; FLAT-NEXT: .LBB3_12: ; %self.loop
|
||||
; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; FLAT-NEXT: s_mov_b64 vcc, vcc
|
||||
; FLAT-NEXT: s_cbranch_vccz BB3_12
|
||||
; FLAT-NEXT: BB3_13: ; %DummyReturnBlock
|
||||
; FLAT-NEXT: s_cbranch_vccz .LBB3_12
|
||||
; FLAT-NEXT: .LBB3_13: ; %DummyReturnBlock
|
||||
; FLAT-NEXT: s_endpgm
|
||||
entry:
|
||||
%cmp = icmp sgt i32 %c0, 0
|
||||
|
|
|
@ -12,14 +12,14 @@ define amdgpu_kernel void @test(i32 %arg, i32 %arg1) {
|
|||
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
|
||||
; CHECK-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
|
||||
; CHECK-NEXT: s_and_b64 vcc, exec, s[0:1]
|
||||
; CHECK-NEXT: s_cbranch_vccnz BB0_3
|
||||
; CHECK-NEXT: s_cbranch_vccnz .LBB0_3
|
||||
; CHECK-NEXT: ; %bb.1: ; %bb9
|
||||
; CHECK-NEXT: s_and_b64 vcc, exec, 0
|
||||
; CHECK-NEXT: BB0_2: ; %bb10
|
||||
; CHECK-NEXT: .LBB0_2: ; %bb10
|
||||
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: s_mov_b64 vcc, vcc
|
||||
; CHECK-NEXT: s_cbranch_vccz BB0_2
|
||||
; CHECK-NEXT: BB0_3: ; %DummyReturnBlock
|
||||
; CHECK-NEXT: s_cbranch_vccz .LBB0_2
|
||||
; CHECK-NEXT: .LBB0_3: ; %DummyReturnBlock
|
||||
; CHECK-NEXT: s_endpgm
|
||||
bb:
|
||||
%tmp = icmp ne i32 %arg, 0
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue