forked from OSchip/llvm-project
[AMDGPU] Translate s_and/s_andn2 to s_mov in vcc optimisation
When SCC is dead, but VCC is required then replace s_and / s_andn2 with s_mov into VCC when mask value is 0 or -1. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D83850
This commit is contained in:
parent
b128f719a4
commit
3a18665748
|
@ -70,6 +70,7 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
|
|||
const unsigned ExecReg = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
|
||||
const unsigned And = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
|
||||
const unsigned AndN2 = IsWave32 ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_ANDN2_B64;
|
||||
const unsigned Mov = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
|
||||
|
||||
MachineBasicBlock::reverse_iterator A = MI.getReverseIterator(),
|
||||
E = MBB.rend();
|
||||
|
@ -136,9 +137,20 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
|
|||
if (A->getOpcode() == AndN2)
|
||||
MaskValue = ~MaskValue;
|
||||
|
||||
if (!ReadsCond && A->registerDefIsDead(AMDGPU::SCC) &&
|
||||
MI.killsRegister(CondReg, TRI))
|
||||
if (!ReadsCond && A->registerDefIsDead(AMDGPU::SCC)) {
|
||||
if (!MI.killsRegister(CondReg, TRI)) {
|
||||
// Replace AND with MOV
|
||||
if (MaskValue == 0) {
|
||||
BuildMI(*A->getParent(), *A, A->getDebugLoc(), TII->get(Mov), CondReg)
|
||||
.addImm(0);
|
||||
} else {
|
||||
BuildMI(*A->getParent(), *A, A->getDebugLoc(), TII->get(Mov), CondReg)
|
||||
.addReg(ExecReg);
|
||||
}
|
||||
}
|
||||
// Remove AND instruction
|
||||
A->eraseFromParent();
|
||||
}
|
||||
|
||||
bool IsVCCZ = MI.getOpcode() == AMDGPU::S_CBRANCH_VCCZ;
|
||||
if (SReg == ExecReg) {
|
||||
|
|
|
@ -158,7 +158,7 @@ define amdgpu_kernel void @infinite_loop_nest_ret(i32 addrspace(1)* %out) {
|
|||
; SI-NEXT: ; %bb.4: ; %loop.exit.guard
|
||||
; SI-NEXT: ; in Loop: Header=BB3_2 Depth=1
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; SI-NEXT: s_and_b64 vcc, exec, 0
|
||||
; SI-NEXT: s_mov_b64 vcc, 0
|
||||
; SI-NEXT: s_branch BB3_2
|
||||
; SI-NEXT: BB3_5: ; %UnifiedReturnBlock
|
||||
; SI-NEXT: s_endpgm
|
||||
|
|
|
@ -415,3 +415,123 @@ body: |
|
|||
S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: and_0_mov
|
||||
# GCN: bb.2:
|
||||
# GCN-NOT: S_AND
|
||||
# GCN: $vcc = S_MOV_B64 0
|
||||
# GCN-NEXT: S_BRANCH %bb.1
|
||||
name: and_0_mov
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
$sgpr0_sgpr1 = S_MOV_B64 0
|
||||
$vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
|
||||
S_CBRANCH_VCCZ %bb.1, implicit $vcc
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: andn2_m1_mov
|
||||
# GCN: bb.2:
|
||||
# GCN-NOT: S_ANDN2
|
||||
# GCN: $vcc = S_MOV_B64 0
|
||||
# GCN-NEXT: S_BRANCH %bb.1
|
||||
name: andn2_m1_mov
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
$sgpr0_sgpr1 = S_MOV_B64 -1
|
||||
$vcc = S_ANDN2_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
|
||||
S_CBRANCH_VCCZ %bb.1, implicit $vcc
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: and_m1_mov
|
||||
# GCN: bb.2:
|
||||
# GCN-NOT: S_AND
|
||||
# GCN: $vcc = S_MOV_B64 $exec
|
||||
# GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
name: and_m1_mov
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
$sgpr0_sgpr1 = S_MOV_B64 -1
|
||||
$vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
|
||||
S_CBRANCH_VCCZ %bb.1, implicit $vcc
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: andn2_0_mov
|
||||
# GCN: bb.2:
|
||||
# GCN-NOT: S_ANDN2
|
||||
# GCN: $vcc = S_MOV_B64 $exec
|
||||
# GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
name: andn2_0_mov
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
$sgpr0_sgpr1 = S_MOV_B64 0
|
||||
$vcc = S_ANDN2_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
|
||||
S_CBRANCH_VCCZ %bb.1, implicit $vcc
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: and_0_scc_req
|
||||
# GCN: bb.2:
|
||||
# GCN-NOT: S_MOV_
|
||||
# GCN: S_AND_
|
||||
# GCN-NEXT: S_BRANCH %bb.1
|
||||
name: and_0_scc_req
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
$sgpr0_sgpr1 = S_MOV_B64 0
|
||||
$vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
|
||||
S_CBRANCH_VCCZ %bb.1, implicit $vcc
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: andn2_m1_scc_req
|
||||
# GCN: bb.2:
|
||||
# GCN-NOT: S_MOV_
|
||||
# GCN: S_ANDN2_
|
||||
# GCN-NEXT: S_BRANCH %bb.1
|
||||
name: andn2_m1_scc_req
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
$sgpr0_sgpr1 = S_MOV_B64 -1
|
||||
$vcc = S_ANDN2_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
|
||||
S_CBRANCH_VCCZ %bb.1, implicit $vcc
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
|
|
@ -836,8 +836,8 @@ define amdgpu_ps void @test_wqm_vote(float %a) {
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_branch_true:
|
||||
; GFX1032: s_and_b32 vcc_lo, exec_lo, -1
|
||||
; GFX1064: s_and_b64 vcc, exec, -1
|
||||
; GFX1032: s_mov_b32 vcc_lo, exec_lo
|
||||
; GFX1064: s_mov_b64 vcc, exec
|
||||
define amdgpu_kernel void @test_branch_true() #2 {
|
||||
entry:
|
||||
br i1 true, label %for.end, label %for.body.lr.ph
|
||||
|
|
Loading…
Reference in New Issue