forked from OSchip/llvm-project
[AMDGPU] Extend pre-emit peephole to redundantly masked VCC
Extend pre-emit peephole for S_CBRANCH_VCC[N]Z to eliminate redundant S_AND operations against EXEC for V_CMP results in VCC. These occur after after register allocation when VCC has been selected as the comparison destination. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D120202
This commit is contained in:
parent
79787b903d
commit
565af157ef
|
@ -74,6 +74,15 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
|
|||
// We end up with this pattern sometimes after basic block placement.
|
||||
// It happens while combining a block which assigns -1 or 0 to a saved mask
|
||||
// and another block which consumes that saved mask and then a branch.
|
||||
//
|
||||
// While searching this also performs the following substitution:
|
||||
// vcc = V_CMP
|
||||
// vcc = S_AND exec, vcc
|
||||
// S_CBRANCH_VCC[N]Z
|
||||
// =>
|
||||
// vcc = V_CMP
|
||||
// S_CBRANCH_VCC[N]Z
|
||||
|
||||
bool Changed = false;
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
const GCNSubtarget &ST = MBB.getParent()->getSubtarget<GCNSubtarget>();
|
||||
|
@ -121,14 +130,27 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
|
|||
SReg = Op2.getReg();
|
||||
auto M = std::next(A);
|
||||
bool ReadsSreg = false;
|
||||
bool ModifiesExec = false;
|
||||
for (; M != E; ++M) {
|
||||
if (M->definesRegister(SReg, TRI))
|
||||
break;
|
||||
if (M->modifiesRegister(SReg, TRI))
|
||||
return Changed;
|
||||
ReadsSreg |= M->readsRegister(SReg, TRI);
|
||||
ModifiesExec |= M->modifiesRegister(ExecReg, TRI);
|
||||
}
|
||||
if (M == E || !M->isMoveImmediate() || !M->getOperand(1).isImm() ||
|
||||
if (M == E)
|
||||
return Changed;
|
||||
// If SReg is VCC and SReg definition is a VALU comparison.
|
||||
// This means S_AND with EXEC is not required.
|
||||
// Erase the S_AND and return.
|
||||
// Note: isVOPC is used instead of isCompare to catch V_CMP_CLASS
|
||||
if (A->getOpcode() == And && SReg == CondReg && !ModifiesExec &&
|
||||
TII->isVOPC(*M) && TII->isVALU(*M)) {
|
||||
A->eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
if (!M->isMoveImmediate() || !M->getOperand(1).isImm() ||
|
||||
(M->getOperand(1).getImm() != -1 && M->getOperand(1).getImm() != 0))
|
||||
return Changed;
|
||||
MaskValue = M->getOperand(1).getImm();
|
||||
|
|
|
@ -623,7 +623,6 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
|
|||
; GFX908-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[12:13]
|
||||
; GFX908-NEXT: v_add_co_u32_e64 v14, s[2:3], v14, v6
|
||||
; GFX908-NEXT: v_addc_co_u32_e64 v15, s[2:3], v15, v7, s[2:3]
|
||||
; GFX908-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GFX908-NEXT: s_cbranch_vccz .LBB3_1
|
||||
; GFX908-NEXT: .LBB3_5: ; %bb16
|
||||
; GFX908-NEXT: ; Parent Loop BB3_2 Depth=1
|
||||
|
@ -751,7 +750,6 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
|
|||
; GFX90A-NEXT: v_add_co_u32_e32 v16, vcc, v16, v10
|
||||
; GFX90A-NEXT: v_addc_co_u32_e32 v17, vcc, v17, v11, vcc
|
||||
; GFX90A-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[14:15]
|
||||
; GFX90A-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GFX90A-NEXT: s_cbranch_vccz .LBB3_1
|
||||
; GFX90A-NEXT: .LBB3_5: ; %bb16
|
||||
; GFX90A-NEXT: ; Parent Loop BB3_2 Depth=1
|
||||
|
|
|
@ -82,7 +82,6 @@ define amdgpu_kernel void @s_add_co_br_user(i32 %i) {
|
|||
; GFX7-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; GFX7-NEXT: s_addc_u32 s0, s2, 0
|
||||
; GFX7-NEXT: v_cmp_ge_u32_e32 vcc, s0, v0
|
||||
; GFX7-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GFX7-NEXT: s_cbranch_vccnz .LBB1_2
|
||||
; GFX7-NEXT: ; %bb.1: ; %bb0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, 0
|
||||
|
@ -109,7 +108,6 @@ define amdgpu_kernel void @s_add_co_br_user(i32 %i) {
|
|||
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
|
||||
; GFX9-NEXT: s_addc_u32 s0, s2, 0
|
||||
; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, s0, v0
|
||||
; GFX9-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GFX9-NEXT: s_cbranch_vccnz .LBB1_2
|
||||
; GFX9-NEXT: ; %bb.1: ; %bb0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
||||
|
@ -136,7 +134,6 @@ define amdgpu_kernel void @s_add_co_br_user(i32 %i) {
|
|||
; GFX10-NEXT: s_cmpk_lg_u32 s1, 0x0
|
||||
; GFX10-NEXT: s_addc_u32 s0, s0, 0
|
||||
; GFX10-NEXT: v_cmp_ge_u32_e32 vcc_lo, s0, v0
|
||||
; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo
|
||||
; GFX10-NEXT: s_cbranch_vccnz .LBB1_2
|
||||
; GFX10-NEXT: ; %bb.1: ; %bb0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
||||
|
|
|
@ -71,7 +71,6 @@ define amdgpu_kernel void @test_move_load_address_to_vgpr_d16_hi(i16 addrspace(1
|
|||
; GCN-NEXT: global_load_short_d16_hi v0, v[2:3], off glc
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s2, v0
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-NEXT: s_cbranch_vccz .LBB1_1
|
||||
; GCN-NEXT: ; %bb.2: ; %bb2
|
||||
; GCN-NEXT: s_endpgm
|
||||
|
|
|
@ -431,7 +431,6 @@ define amdgpu_kernel void @udiv16_invariant_denom(i16 addrspace(1)* nocapture %a
|
|||
; GFX9-NEXT: v_mad_f32 v0, -v0, v2, v8
|
||||
; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, s5, v4
|
||||
; GFX9-NEXT: v_cmp_ge_f32_e64 s[0:1], |v0|, v2
|
||||
; GFX9-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GFX9-NEXT: v_addc_co_u32_e64 v0, s[0:1], 0, v7, s[0:1]
|
||||
; GFX9-NEXT: global_store_short v[5:6], v0, off
|
||||
; GFX9-NEXT: s_cbranch_vccz .LBB4_1
|
||||
|
@ -516,7 +515,6 @@ define amdgpu_kernel void @urem16_invariant_denom(i16 addrspace(1)* nocapture %a
|
|||
; GFX9-NEXT: v_addc_co_u32_e64 v8, s[2:3], 0, v10, s[2:3]
|
||||
; GFX9-NEXT: v_mul_lo_u32 v8, v8, s7
|
||||
; GFX9-NEXT: v_add_co_u32_e64 v5, s[0:1], s4, v5
|
||||
; GFX9-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GFX9-NEXT: v_addc_co_u32_e64 v6, s[0:1], v7, v6, s[0:1]
|
||||
; GFX9-NEXT: v_sub_u32_e32 v0, v0, v8
|
||||
; GFX9-NEXT: global_store_short v[5:6], v0, off
|
||||
|
@ -552,7 +550,6 @@ define amdgpu_kernel void @urem16_invariant_denom(i16 addrspace(1)* nocapture %a
|
|||
; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v8, vcc_lo
|
||||
; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x400, v4
|
||||
; GFX10-NEXT: v_mul_lo_u32 v7, v7, s4
|
||||
; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo
|
||||
; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v7
|
||||
; GFX10-NEXT: global_store_short v[5:6], v0, off
|
||||
; GFX10-NEXT: s_cbranch_vccz .LBB5_1
|
||||
|
@ -608,7 +605,6 @@ define amdgpu_kernel void @sdiv16_invariant_denom(i16 addrspace(1)* nocapture %a
|
|||
; GFX9-NEXT: v_cmp_ge_f32_e64 s[0:1], |v7|, |v2|
|
||||
; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, s5, v4
|
||||
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[0:1]
|
||||
; GFX9-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GFX9-NEXT: v_add_u32_e32 v0, v8, v0
|
||||
; GFX9-NEXT: global_store_short v[5:6], v0, off
|
||||
; GFX9-NEXT: s_cbranch_vccz .LBB6_1
|
||||
|
@ -701,7 +697,6 @@ define amdgpu_kernel void @srem16_invariant_denom(i16 addrspace(1)* nocapture %a
|
|||
; GFX9-NEXT: v_mov_b32_e32 v8, s5
|
||||
; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, s7, v4
|
||||
; GFX9-NEXT: v_add_co_u32_e64 v5, s[0:1], s4, v5
|
||||
; GFX9-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GFX9-NEXT: v_addc_co_u32_e64 v6, s[0:1], v8, v6, s[0:1]
|
||||
; GFX9-NEXT: v_sub_u32_e32 v0, v7, v0
|
||||
; GFX9-NEXT: global_store_short v[5:6], v0, off
|
||||
|
@ -741,7 +736,6 @@ define amdgpu_kernel void @srem16_invariant_denom(i16 addrspace(1)* nocapture %a
|
|||
; GFX10-NEXT: v_add_co_u32 v5, s0, s2, v5
|
||||
; GFX10-NEXT: v_add_co_ci_u32_e64 v6, s0, s3, v6, s0
|
||||
; GFX10-NEXT: v_mul_lo_u32 v0, v0, s1
|
||||
; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo
|
||||
; GFX10-NEXT: v_sub_nc_u32_e32 v0, v7, v0
|
||||
; GFX10-NEXT: global_store_short v[5:6], v0, off
|
||||
; GFX10-NEXT: s_cbranch_vccz .LBB7_1
|
||||
|
|
|
@ -535,3 +535,119 @@ body: |
|
|||
S_CBRANCH_VCCZ %bb.1, implicit $vcc
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: and_cmp_vccz
|
||||
# GCN: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
|
||||
# GCN-NOT: S_AND_
|
||||
# GCN: S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
name: and_cmp_vccz
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
|
||||
$vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
|
||||
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: and_cmp_vccnz
|
||||
# GCN: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
|
||||
# GCN-NOT: S_AND_
|
||||
# GCN: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
|
||||
name: and_cmp_vccnz
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
|
||||
$vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
|
||||
S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: andn2_cmp_vccz
|
||||
# GCN: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
|
||||
# GCN: $vcc = S_ANDN2_B64 $exec, $vcc, implicit-def dead $scc
|
||||
# GCN: S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
name: andn2_cmp_vccz
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
|
||||
$vcc = S_ANDN2_B64 $exec, $vcc, implicit-def dead $scc
|
||||
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: and_cmpclass_vccz
|
||||
# GCN: V_CMP_CLASS_F32_e32 killed $sgpr0, killed $vgpr0, implicit-def $vcc, implicit $exec
|
||||
# GCN-NOT: S_AND_
|
||||
# GCN: S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
name: and_cmpclass_vccz
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
V_CMP_CLASS_F32_e32 killed $sgpr0, killed $vgpr0, implicit-def $vcc, implicit $exec
|
||||
$vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
|
||||
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: and_cmpx_vccz
|
||||
# GCN: V_CMPX_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit-def $exec, implicit $exec
|
||||
# GCN-NOT: S_AND_
|
||||
# GCN: S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
name: and_cmpx_vccz
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
V_CMPX_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit-def $exec, implicit $exec
|
||||
$vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
|
||||
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: and_or_cmp_vccz
|
||||
# GCN: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
|
||||
# GCN: $exec = S_OR_B64 $exec, $sgpr0_sgpr1, implicit-def dead $scc
|
||||
# GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
|
||||
# GCN: S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
name: and_or_cmp_vccz
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
|
||||
$exec = S_OR_B64 $exec, $sgpr0_sgpr1, implicit-def dead $scc
|
||||
$vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
|
||||
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
|
|
@ -195,7 +195,6 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
|
|||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: s_mov_b64 s[6:7], -1
|
||||
; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 1, v1
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-NEXT: ; implicit-def: $sgpr8_sgpr9
|
||||
; GCN-NEXT: s_mov_b64 s[10:11], -1
|
||||
; GCN-NEXT: s_cbranch_vccnz .LBB1_6
|
||||
|
@ -203,7 +202,6 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
|
|||
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
|
||||
; GCN-NEXT: s_mov_b64 s[6:7], -1
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-NEXT: s_mov_b64 s[8:9], -1
|
||||
; GCN-NEXT: s_cbranch_vccz .LBB1_5
|
||||
; GCN-NEXT: ; %bb.4: ; %case1
|
||||
|
@ -223,7 +221,6 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
|
|||
; GCN-NEXT: ; %bb.7: ; %LeafBlock
|
||||
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-NEXT: s_mov_b64 s[8:9], -1
|
||||
; GCN-NEXT: s_cbranch_vccz .LBB1_1
|
||||
; GCN-NEXT: ; %bb.8: ; %case0
|
||||
|
|
|
@ -151,7 +151,6 @@ define amdgpu_kernel void @nested_loop_conditions(i64 addrspace(1)* nocapture %a
|
|||
; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 8, v0
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-NEXT: s_cbranch_vccnz .LBB1_6
|
||||
; GCN-NEXT: ; %bb.1: ; %bb14.lr.ph
|
||||
; GCN-NEXT: s_load_dword s4, s[0:1], 0x0
|
||||
|
@ -176,7 +175,6 @@ define amdgpu_kernel void @nested_loop_conditions(i64 addrspace(1)* nocapture %a
|
|||
; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 8, v0
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-NEXT: s_cbranch_vccnz .LBB1_4
|
||||
; GCN-NEXT: ; %bb.5: ; %bb21
|
||||
; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1
|
||||
|
|
|
@ -438,7 +438,6 @@ entry:
|
|||
; {{^}}sopc_vopc_legalize_bug:
|
||||
; GCN: s_load_dword [[SGPR:s[0-9]+]]
|
||||
; GCN: v_cmp_le_u32_e32 vcc, [[SGPR]], v{{[0-9]+}}
|
||||
; GCN: s_and_b64 vcc, exec, vcc
|
||||
; GCN: s_cbranch_vccnz [[EXIT:.L[A-Z0-9_]+]]
|
||||
; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
|
||||
; GCN-NOHSA: buffer_store_dword [[ONE]]
|
||||
|
|
|
@ -217,7 +217,6 @@ define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
|||
; GCN-IR-NEXT: s_addc_u32 s11, s11, 0
|
||||
; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[10:11], v[0:1]
|
||||
; GCN-IR-NEXT: s_mov_b64 s[14:15], s[8:9]
|
||||
; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-IR-NEXT: s_cbranch_vccz .LBB0_3
|
||||
; GCN-IR-NEXT: .LBB0_4: ; %Flow6
|
||||
; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[12:13], 1
|
||||
|
@ -1071,7 +1070,6 @@ define amdgpu_kernel void @s_test_sdiv24_48(i48 addrspace(1)* %out, i48 %x, i48
|
|||
; GCN-IR-NEXT: s_addc_u32 s11, s11, 0
|
||||
; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[10:11], v[0:1]
|
||||
; GCN-IR-NEXT: s_mov_b64 s[14:15], s[8:9]
|
||||
; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-IR-NEXT: s_cbranch_vccz .LBB9_3
|
||||
; GCN-IR-NEXT: .LBB9_4: ; %Flow3
|
||||
; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[12:13], 1
|
||||
|
@ -1283,7 +1281,6 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(i64 addrspace(1)* %out, i64 %x)
|
|||
; GCN-IR-NEXT: s_addc_u32 s9, s9, 0
|
||||
; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[8:9], v[0:1]
|
||||
; GCN-IR-NEXT: s_mov_b64 s[12:13], s[6:7]
|
||||
; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-IR-NEXT: s_cbranch_vccz .LBB10_3
|
||||
; GCN-IR-NEXT: .LBB10_4: ; %Flow5
|
||||
; GCN-IR-NEXT: s_lshl_b64 s[2:3], s[10:11], 1
|
||||
|
|
|
@ -750,7 +750,6 @@ define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 {
|
|||
; SI-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; SI-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; SI-NEXT: s_cbranch_vccnz .LBB10_2
|
||||
; SI-NEXT: .LBB10_4: ; %Flow1
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
|
@ -796,7 +795,6 @@ define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 {
|
|||
; GFX10-WAVE64-NEXT: global_load_dword v0, v[0:1], off glc dlc
|
||||
; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GFX10-WAVE64-NEXT: s_cbranch_vccnz .LBB10_1
|
||||
; GFX10-WAVE64-NEXT: .LBB10_3: ; %Flow1
|
||||
; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
|
@ -840,7 +838,6 @@ define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 {
|
|||
; GFX10-WAVE32-NEXT: global_load_dword v0, v[0:1], off glc dlc
|
||||
; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo
|
||||
; GFX10-WAVE32-NEXT: s_cbranch_vccnz .LBB10_1
|
||||
; GFX10-WAVE32-NEXT: .LBB10_3: ; %Flow1
|
||||
; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s1
|
||||
|
@ -901,7 +898,6 @@ define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 {
|
|||
; SI-NEXT: v_mov_b32_e32 v0, 4.0
|
||||
; SI-NEXT: .LBB11_3: ; %phibb
|
||||
; SI-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
|
||||
; SI-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; SI-NEXT: s_cbranch_vccz .LBB11_5
|
||||
; SI-NEXT: ; %bb.4: ; %bb10
|
||||
; SI-NEXT: s_mov_b32 s3, 0xf000
|
||||
|
@ -934,7 +930,6 @@ define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 {
|
|||
; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WAVE64-NEXT: .LBB11_3: ; %phibb
|
||||
; GFX10-WAVE64-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
|
||||
; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GFX10-WAVE64-NEXT: s_cbranch_vccz .LBB11_5
|
||||
; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb10
|
||||
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 9
|
||||
|
@ -965,7 +960,6 @@ define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 {
|
|||
; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WAVE32-NEXT: .LBB11_3: ; %phibb
|
||||
; GFX10-WAVE32-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0
|
||||
; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo
|
||||
; GFX10-WAVE32-NEXT: s_cbranch_vccz .LBB11_5
|
||||
; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb10
|
||||
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 9
|
||||
|
|
|
@ -189,7 +189,6 @@ define amdgpu_kernel void @s_test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
|||
; GCN-IR-NEXT: s_addc_u32 s9, s9, 0
|
||||
; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[8:9], v[0:1]
|
||||
; GCN-IR-NEXT: s_mov_b64 s[12:13], s[6:7]
|
||||
; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-IR-NEXT: s_cbranch_vccz .LBB0_3
|
||||
; GCN-IR-NEXT: .LBB0_4: ; %Flow6
|
||||
; GCN-IR-NEXT: s_lshl_b64 s[8:9], s[10:11], 1
|
||||
|
@ -1081,7 +1080,6 @@ define amdgpu_kernel void @s_test_srem33_64(i64 addrspace(1)* %out, i64 %x, i64
|
|||
; GCN-IR-NEXT: s_addc_u32 s11, s11, 0
|
||||
; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[10:11], v[0:1]
|
||||
; GCN-IR-NEXT: s_mov_b64 s[14:15], s[6:7]
|
||||
; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-IR-NEXT: s_cbranch_vccz .LBB8_3
|
||||
; GCN-IR-NEXT: .LBB8_4: ; %Flow6
|
||||
; GCN-IR-NEXT: s_lshl_b64 s[10:11], s[12:13], 1
|
||||
|
@ -1243,7 +1241,6 @@ define amdgpu_kernel void @s_test_srem24_48(i48 addrspace(1)* %out, i48 %x, i48
|
|||
; GCN-IR-NEXT: s_addc_u32 s11, s11, 0
|
||||
; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[10:11], v[0:1]
|
||||
; GCN-IR-NEXT: s_mov_b64 s[14:15], s[8:9]
|
||||
; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-IR-NEXT: s_cbranch_vccz .LBB9_3
|
||||
; GCN-IR-NEXT: .LBB9_4: ; %Flow3
|
||||
; GCN-IR-NEXT: s_lshl_b64 s[10:11], s[12:13], 1
|
||||
|
@ -1457,7 +1454,6 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(i64 addrspace(1)* %out, i64 %x)
|
|||
; GCN-IR-NEXT: s_addc_u32 s7, s7, 0
|
||||
; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
|
||||
; GCN-IR-NEXT: s_mov_b64 s[10:11], s[2:3]
|
||||
; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-IR-NEXT: s_cbranch_vccz .LBB10_3
|
||||
; GCN-IR-NEXT: .LBB10_4: ; %Flow5
|
||||
; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[8:9], 1
|
||||
|
|
|
@ -363,7 +363,6 @@ define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %
|
|||
; NOHSA-TRAP-GFX900-V2-NEXT: global_load_dword v1, v0, s[0:1] glc
|
||||
; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)
|
||||
; NOHSA-TRAP-GFX900-V2-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
|
||||
; NOHSA-TRAP-GFX900-V2-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; NOHSA-TRAP-GFX900-V2-NEXT: s_cbranch_vccz .LBB1_2
|
||||
; NOHSA-TRAP-GFX900-V2-NEXT: ; %bb.1: ; %ret
|
||||
; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 3
|
||||
|
@ -381,7 +380,6 @@ define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %
|
|||
; NOHSA-TRAP-GFX900-V3-NEXT: global_load_dword v1, v0, s[0:1] glc
|
||||
; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
|
||||
; NOHSA-TRAP-GFX900-V3-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
|
||||
; NOHSA-TRAP-GFX900-V3-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; NOHSA-TRAP-GFX900-V3-NEXT: s_cbranch_vccz .LBB1_2
|
||||
; NOHSA-TRAP-GFX900-V3-NEXT: ; %bb.1: ; %ret
|
||||
; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 3
|
||||
|
@ -399,7 +397,6 @@ define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %
|
|||
; NOHSA-TRAP-GFX900-V4-NEXT: global_load_dword v1, v0, s[0:1] glc
|
||||
; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
|
||||
; NOHSA-TRAP-GFX900-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
|
||||
; NOHSA-TRAP-GFX900-V4-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; NOHSA-TRAP-GFX900-V4-NEXT: s_cbranch_vccz .LBB1_2
|
||||
; NOHSA-TRAP-GFX900-V4-NEXT: ; %bb.1: ; %ret
|
||||
; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 3
|
||||
|
@ -486,7 +483,6 @@ define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %
|
|||
; HSA-TRAP-GFX803-V2-NEXT: flat_load_dword v0, v[0:1] glc
|
||||
; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt vmcnt(0)
|
||||
; HSA-TRAP-GFX803-V2-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0
|
||||
; HSA-TRAP-GFX803-V2-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; HSA-TRAP-GFX803-V2-NEXT: s_cbranch_vccz .LBB1_2
|
||||
; HSA-TRAP-GFX803-V2-NEXT: ; %bb.1: ; %ret
|
||||
; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v0, s0
|
||||
|
@ -508,7 +504,6 @@ define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %
|
|||
; HSA-TRAP-GFX803-V3-NEXT: flat_load_dword v0, v[0:1] glc
|
||||
; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0)
|
||||
; HSA-TRAP-GFX803-V3-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0
|
||||
; HSA-TRAP-GFX803-V3-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; HSA-TRAP-GFX803-V3-NEXT: s_cbranch_vccz .LBB1_2
|
||||
; HSA-TRAP-GFX803-V3-NEXT: ; %bb.1: ; %ret
|
||||
; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v0, s0
|
||||
|
@ -530,7 +525,6 @@ define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %
|
|||
; HSA-TRAP-GFX803-V4-NEXT: flat_load_dword v0, v[0:1] glc
|
||||
; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0)
|
||||
; HSA-TRAP-GFX803-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0
|
||||
; HSA-TRAP-GFX803-V4-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; HSA-TRAP-GFX803-V4-NEXT: s_cbranch_vccz .LBB1_2
|
||||
; HSA-TRAP-GFX803-V4-NEXT: ; %bb.1: ; %ret
|
||||
; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v0, s0
|
||||
|
@ -619,7 +613,6 @@ define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %
|
|||
; HSA-TRAP-GFX900-V2-NEXT: global_load_dword v1, v0, s[0:1] glc
|
||||
; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)
|
||||
; HSA-TRAP-GFX900-V2-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
|
||||
; HSA-TRAP-GFX900-V2-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; HSA-TRAP-GFX900-V2-NEXT: s_cbranch_vccz .LBB1_2
|
||||
; HSA-TRAP-GFX900-V2-NEXT: ; %bb.1: ; %ret
|
||||
; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 3
|
||||
|
@ -638,7 +631,6 @@ define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %
|
|||
; HSA-TRAP-GFX900-V3-NEXT: global_load_dword v1, v0, s[0:1] glc
|
||||
; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
|
||||
; HSA-TRAP-GFX900-V3-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
|
||||
; HSA-TRAP-GFX900-V3-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; HSA-TRAP-GFX900-V3-NEXT: s_cbranch_vccz .LBB1_2
|
||||
; HSA-TRAP-GFX900-V3-NEXT: ; %bb.1: ; %ret
|
||||
; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 3
|
||||
|
@ -657,7 +649,6 @@ define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %
|
|||
; HSA-TRAP-GFX900-V4-NEXT: global_load_dword v1, v0, s[0:1] glc
|
||||
; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
|
||||
; HSA-TRAP-GFX900-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
|
||||
; HSA-TRAP-GFX900-V4-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; HSA-TRAP-GFX900-V4-NEXT: s_cbranch_vccz .LBB1_2
|
||||
; HSA-TRAP-GFX900-V4-NEXT: ; %bb.1: ; %ret
|
||||
; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 3
|
||||
|
@ -743,7 +734,6 @@ define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %
|
|||
; HSA-NOTRAP-GFX900-V2-NEXT: global_load_dword v1, v0, s[0:1] glc
|
||||
; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)
|
||||
; HSA-NOTRAP-GFX900-V2-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
|
||||
; HSA-NOTRAP-GFX900-V2-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; HSA-NOTRAP-GFX900-V2-NEXT: s_cbranch_vccz .LBB1_2
|
||||
; HSA-NOTRAP-GFX900-V2-NEXT: ; %bb.1: ; %ret
|
||||
; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 3
|
||||
|
@ -761,7 +751,6 @@ define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %
|
|||
; HSA-NOTRAP-GFX900-V3-NEXT: global_load_dword v1, v0, s[0:1] glc
|
||||
; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
|
||||
; HSA-NOTRAP-GFX900-V3-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
|
||||
; HSA-NOTRAP-GFX900-V3-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; HSA-NOTRAP-GFX900-V3-NEXT: s_cbranch_vccz .LBB1_2
|
||||
; HSA-NOTRAP-GFX900-V3-NEXT: ; %bb.1: ; %ret
|
||||
; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 3
|
||||
|
@ -779,7 +768,6 @@ define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %
|
|||
; HSA-NOTRAP-GFX900-V4-NEXT: global_load_dword v1, v0, s[0:1] glc
|
||||
; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
|
||||
; HSA-NOTRAP-GFX900-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
|
||||
; HSA-NOTRAP-GFX900-V4-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; HSA-NOTRAP-GFX900-V4-NEXT: s_cbranch_vccz .LBB1_2
|
||||
; HSA-NOTRAP-GFX900-V4-NEXT: ; %bb.1: ; %ret
|
||||
; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 3
|
||||
|
|
|
@ -190,7 +190,6 @@ define amdgpu_kernel void @s_test_udiv_i64(i64 addrspace(1)* %out, i64 %x, i64 %
|
|||
; GCN-IR-NEXT: s_addc_u32 s3, s3, 0
|
||||
; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1]
|
||||
; GCN-IR-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-IR-NEXT: s_cbranch_vccz .LBB0_3
|
||||
; GCN-IR-NEXT: .LBB0_4: ; %Flow6
|
||||
; GCN-IR-NEXT: s_lshl_b64 s[2:3], s[8:9], 1
|
||||
|
@ -879,7 +878,6 @@ define amdgpu_kernel void @s_test_udiv24_i48(i48 addrspace(1)* %out, i48 %x, i48
|
|||
; GCN-IR-NEXT: s_addc_u32 s7, s7, 0
|
||||
; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
|
||||
; GCN-IR-NEXT: s_mov_b64 s[10:11], s[0:1]
|
||||
; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-IR-NEXT: s_cbranch_vccz .LBB7_3
|
||||
; GCN-IR-NEXT: .LBB7_4: ; %Flow3
|
||||
; GCN-IR-NEXT: s_lshl_b64 s[2:3], s[8:9], 1
|
||||
|
@ -1070,7 +1068,6 @@ define amdgpu_kernel void @s_test_udiv_k_num_i64(i64 addrspace(1)* %out, i64 %x)
|
|||
; GCN-IR-NEXT: s_addc_u32 s7, s7, 0
|
||||
; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
|
||||
; GCN-IR-NEXT: s_mov_b64 s[10:11], s[4:5]
|
||||
; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-IR-NEXT: s_cbranch_vccz .LBB8_3
|
||||
; GCN-IR-NEXT: .LBB8_4: ; %Flow5
|
||||
; GCN-IR-NEXT: s_lshl_b64 s[2:3], s[8:9], 1
|
||||
|
@ -1527,7 +1524,6 @@ define amdgpu_kernel void @s_test_udiv_k_den_i64(i64 addrspace(1)* %out, i64 %x)
|
|||
; GCN-IR-NEXT: s_addc_u32 s3, s3, 0
|
||||
; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1]
|
||||
; GCN-IR-NEXT: s_mov_b64 s[8:9], s[4:5]
|
||||
; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-IR-NEXT: s_cbranch_vccz .LBB11_3
|
||||
; GCN-IR-NEXT: .LBB11_4: ; %Flow5
|
||||
; GCN-IR-NEXT: s_lshl_b64 s[2:3], s[6:7], 1
|
||||
|
|
|
@ -118,7 +118,6 @@ done:
|
|||
; Using a floating-point value in an integer compare will cause the compare to
|
||||
; be selected for the SALU and then later moved to the VALU.
|
||||
; GCN: v_cmp_ne_u32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 5, [[CMP]]
|
||||
; GCN: s_and_b64 vcc, exec, [[COND]]
|
||||
; GCN: s_cbranch_vccnz [[ENDIF_LABEL:.L[0-9_A-Za-z]+]]
|
||||
; GCN: buffer_store_dword
|
||||
; GCN: [[ENDIF_LABEL]]:
|
||||
|
@ -143,7 +142,6 @@ endif:
|
|||
; Using a floating-point value in an integer compare will cause the compare to
|
||||
; be selected for the SALU and then later moved to the VALU.
|
||||
; GCN: v_cmp_gt_u32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 6, [[CMP]]
|
||||
; GCN: s_and_b64 vcc, exec, [[COND]]
|
||||
; GCN: s_cbranch_vccnz [[ENDIF_LABEL:.L[0-9_A-Za-z]+]]
|
||||
; GCN: buffer_store_dword
|
||||
; GCN: [[ENDIF_LABEL]]:
|
||||
|
|
|
@ -19,11 +19,9 @@ define hidden void @widget() {
|
|||
; GCN-NEXT: flat_load_dword v0, v[0:1]
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 21, v0
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-NEXT: s_cbranch_vccz .LBB0_3
|
||||
; GCN-NEXT: ; %bb.1: ; %bb4
|
||||
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 9, v0
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-NEXT: s_cbranch_vccnz .LBB0_4
|
||||
; GCN-NEXT: ; %bb.2: ; %bb7
|
||||
; GCN-NEXT: s_getpc_b64 s[16:17]
|
||||
|
@ -33,7 +31,6 @@ define hidden void @widget() {
|
|||
; GCN-NEXT: s_branch .LBB0_7
|
||||
; GCN-NEXT: .LBB0_3: ; %bb2
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 21, v0
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-NEXT: s_cbranch_vccnz .LBB0_6
|
||||
; GCN-NEXT: .LBB0_4: ; %bb9
|
||||
; GCN-NEXT: s_getpc_b64 s[16:17]
|
||||
|
|
|
@ -189,7 +189,6 @@ define amdgpu_kernel void @s_test_urem_i64(i64 addrspace(1)* %out, i64 %x, i64 %
|
|||
; GCN-IR-NEXT: s_addc_u32 s9, s9, 0
|
||||
; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[8:9], v[0:1]
|
||||
; GCN-IR-NEXT: s_mov_b64 s[12:13], s[6:7]
|
||||
; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-IR-NEXT: s_cbranch_vccz .LBB0_3
|
||||
; GCN-IR-NEXT: .LBB0_4: ; %Flow6
|
||||
; GCN-IR-NEXT: s_lshl_b64 s[8:9], s[10:11], 1
|
||||
|
@ -887,7 +886,6 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(i64 addrspace(1)* %out, i64 %x)
|
|||
; GCN-IR-NEXT: s_addc_u32 s7, s7, 0
|
||||
; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
|
||||
; GCN-IR-NEXT: s_mov_b64 s[10:11], s[4:5]
|
||||
; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-IR-NEXT: s_cbranch_vccz .LBB6_3
|
||||
; GCN-IR-NEXT: .LBB6_4: ; %Flow5
|
||||
; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[8:9], 1
|
||||
|
@ -1077,7 +1075,6 @@ define amdgpu_kernel void @s_test_urem_k_den_i64(i64 addrspace(1)* %out, i64 %x)
|
|||
; GCN-IR-NEXT: s_addc_u32 s7, s7, 0
|
||||
; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
|
||||
; GCN-IR-NEXT: s_mov_b64 s[10:11], s[4:5]
|
||||
; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GCN-IR-NEXT: s_cbranch_vccz .LBB7_3
|
||||
; GCN-IR-NEXT: .LBB7_4: ; %Flow5
|
||||
; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[8:9], 1
|
||||
|
|
|
@ -495,9 +495,7 @@ entry:
|
|||
|
||||
; GCN-LABEL: {{^}}test_br_cc_f16:
|
||||
; GFX1032: v_cmp_nlt_f16_e32 vcc_lo,
|
||||
; GFX1032: s_and_b32 vcc_lo, exec_lo, vcc_lo
|
||||
; GFX1064: v_cmp_nlt_f16_e32 vcc,
|
||||
; GFX1064: s_and_b64 vcc, exec, vcc{{$}}
|
||||
; GCN-NEXT: s_cbranch_vccnz
|
||||
define amdgpu_kernel void @test_br_cc_f16(
|
||||
half addrspace(1)* %r,
|
||||
|
|
|
@ -1864,7 +1864,6 @@ define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) nounwind {
|
|||
; GFX9-W64-NEXT: v_mov_b32_e32 v1, v5
|
||||
; GFX9-W64-NEXT: v_mov_b32_e32 v2, v6
|
||||
; GFX9-W64-NEXT: v_mov_b32_e32 v3, v7
|
||||
; GFX9-W64-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; GFX9-W64-NEXT: s_cbranch_vccz .LBB31_1
|
||||
; GFX9-W64-NEXT: ; %bb.3:
|
||||
; GFX9-W64-NEXT: s_mov_b64 s[2:3], -1
|
||||
|
@ -1914,7 +1913,6 @@ define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) nounwind {
|
|||
; GFX10-W32-NEXT: v_mov_b32_e32 v6, v2
|
||||
; GFX10-W32-NEXT: v_mov_b32_e32 v5, v1
|
||||
; GFX10-W32-NEXT: v_mov_b32_e32 v4, v0
|
||||
; GFX10-W32-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo
|
||||
; GFX10-W32-NEXT: s_cbranch_vccz .LBB31_1
|
||||
; GFX10-W32-NEXT: ; %bb.3:
|
||||
; GFX10-W32-NEXT: s_mov_b32 s1, -1
|
||||
|
|
Loading…
Reference in New Issue