[AMDGPU] Avoid redundant mode register writes

Summary:
The SIModeRegister pass attempts to generate the minimal number of
writes to the mode register. However it was failing to correctly
deal with some loops, resulting in some redundant setreg instructions
being inserted.

This change amends the pass to avoid generating these redundant
instructions.

Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D82215
This commit is contained in:
Tim Corringham 2020-06-19 18:23:56 +01:00
parent ab27603c6d
commit c3b3b999ec
2 changed files with 95 additions and 7 deletions

View File

@ -108,7 +108,11 @@ public:
// which is used in Phase 3 if we need to insert a mode change.
MachineInstr *FirstInsertionPoint;
BlockData() : FirstInsertionPoint(nullptr){};
// A flag to indicate whether an Exit value has been set (we can't tell by
// examining the Exit value itself as all values may be valid results).
bool ExitSet;
BlockData() : FirstInsertionPoint(nullptr), ExitSet(false){};
};
namespace {
@ -129,6 +133,8 @@ public:
Status DefaultStatus =
Status(FP_ROUND_MODE_DP(0x3), FP_ROUND_MODE_DP(DefaultMode));
bool Changed = false;
public:
SIModeRegister() : MachineFunctionPass(ID) {}
@ -199,6 +205,7 @@ void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI,
(Offset << AMDGPU::Hwreg::OFFSET_SHIFT_) |
(AMDGPU::Hwreg::ID_MODE << AMDGPU::Hwreg::ID_SHIFT_));
++NumSetregInserted;
Changed = true;
InstrMode.Mask &= ~(((1 << Width) - 1) << Offset);
}
}
@ -323,22 +330,49 @@ void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,
// exit value is propagated.
void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
const SIInstrInfo *TII) {
// BlockData *BI = BlockInfo[MBB.getNumber()];
bool RevisitRequired = false;
bool ExitSet = false;
unsigned ThisBlock = MBB.getNumber();
if (MBB.pred_empty()) {
// There are no predecessors, so use the default starting status.
BlockInfo[ThisBlock]->Pred = DefaultStatus;
ExitSet = true;
} else {
// Build a status that is common to all the predecessors by intersecting
// all the predecessor exit status values.
// Mask bits (which represent the Mode bits with a known value) can only be
// added by explicit SETREG instructions or the initial default value -
// the intersection process may remove Mask bits.
// If we find a predecessor that has not yet had an exit value determined
// (this can happen for example if a block is its own predecessor) we defer
// use of that value as the Mask will be all zero, and we will revisit this
// block again later (unless the only predecessor without an exit value is
// this block).
MachineBasicBlock::pred_iterator P = MBB.pred_begin(), E = MBB.pred_end();
MachineBasicBlock &PB = *(*P);
BlockInfo[ThisBlock]->Pred = BlockInfo[PB.getNumber()]->Exit;
unsigned PredBlock = PB.getNumber();
if ((ThisBlock == PredBlock) && (std::next(P) == E)) {
BlockInfo[ThisBlock]->Pred = DefaultStatus;
ExitSet = true;
} else if (BlockInfo[PredBlock]->ExitSet) {
BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
ExitSet = true;
} else if (PredBlock != ThisBlock)
RevisitRequired = true;
for (P = std::next(P); P != E; P = std::next(P)) {
MachineBasicBlock *Pred = *P;
BlockInfo[ThisBlock]->Pred = BlockInfo[ThisBlock]->Pred.intersect(
BlockInfo[Pred->getNumber()]->Exit);
unsigned PredBlock = Pred->getNumber();
if (BlockInfo[PredBlock]->ExitSet) {
if (BlockInfo[ThisBlock]->ExitSet) {
BlockInfo[ThisBlock]->Pred =
BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[PredBlock]->Exit);
} else {
BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
}
ExitSet = true;
} else if (PredBlock != ThisBlock)
RevisitRequired = true;
}
}
Status TmpStatus =
@ -354,6 +388,9 @@ void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
Phase2List.push(&B);
}
}
BlockInfo[ThisBlock]->ExitSet = ExitSet;
if (RevisitRequired)
Phase2List.push(&MBB);
}
// In Phase 3 we revisit each block and if it has an insertion point defined we
@ -361,7 +398,6 @@ void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
// not we insert an appropriate setreg instruction to modify the Mode register.
void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB,
const SIInstrInfo *TII) {
// BlockData *BI = BlockInfo[MBB.getNumber()];
unsigned ThisBlock = MBB.getNumber();
if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) {
Status Delta =
@ -402,5 +438,5 @@ bool SIModeRegister::runOnMachineFunction(MachineFunction &MF) {
BlockInfo.clear();
return NumSetregInserted > 0;
return Changed;
}

View File

@ -457,3 +457,55 @@ body: |
bb.4:
S_ENDPGM 0
...
---
# checks for bug where if a block is its own predecessor it could cause mode tracking
# to produce the wrong mode, resulting in an unnecessary setreg
# CHECK-LABEL: name: single_block_loop
# CHECK-LABEL: bb.0:
# CHECK-NOT: S_SETREG
name: single_block_loop
body: |
bb.0:
successors: %bb.1
S_BRANCH %bb.1
bb.1:
successors: %bb.1, %bb.2
S_CBRANCH_VCCZ %bb.1, implicit $vcc
S_BRANCH %bb.2
bb.2:
successors: %bb.3
liveins: $vgpr1_vgpr2
$vgpr1_vgpr2 = V_FRACT_F64_e32 killed $vgpr1_vgpr2, implicit $mode, implicit $exec
S_BRANCH %bb.3
bb.3:
S_ENDPGM 0
...
---
# checks for a bug where if the first block is its own predecessor the initial mode was
# not correctly propagated, resulting in an unnecessary setreg
# CHECK-LABEL: name: first_block_loop
# CHECK-LABEL: bb.0:
# CHECK-NOT: S_SETREG
name: first_block_loop
body: |
bb.0:
successors: %bb.0, %bb.1
S_CBRANCH_VCCZ %bb.0, implicit $vcc
S_BRANCH %bb.1
bb.1:
successors: %bb.2
liveins: $vgpr1_vgpr2
$vgpr1_vgpr2 = V_FRACT_F64_e32 killed $vgpr1_vgpr2, implicit $mode, implicit $exec
S_BRANCH %bb.2
bb.2:
S_ENDPGM 0
...