forked from OSchip/llvm-project
[AMDGPU] Avoid redundant mode register writes
Summary: The SIModeRegister pass attempts to generate the minimal number of writes to the mode register. However it was failing to correctly deal with some loops, resulting in some redundant setreg instructions being inserted. This change amends the pass to avoid generating these redundant instructions. Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D82215
This commit is contained in:
parent
ab27603c6d
commit
c3b3b999ec
|
@ -108,7 +108,11 @@ public:
|
|||
// which is used in Phase 3 if we need to insert a mode change.
|
||||
MachineInstr *FirstInsertionPoint;
|
||||
|
||||
BlockData() : FirstInsertionPoint(nullptr){};
|
||||
// A flag to indicate whether an Exit value has been set (we can't tell by
|
||||
// examining the Exit value itself as all values may be valid results).
|
||||
bool ExitSet;
|
||||
|
||||
BlockData() : FirstInsertionPoint(nullptr), ExitSet(false){};
|
||||
};
|
||||
|
||||
namespace {
|
||||
|
@ -129,6 +133,8 @@ public:
|
|||
Status DefaultStatus =
|
||||
Status(FP_ROUND_MODE_DP(0x3), FP_ROUND_MODE_DP(DefaultMode));
|
||||
|
||||
bool Changed = false;
|
||||
|
||||
public:
|
||||
SIModeRegister() : MachineFunctionPass(ID) {}
|
||||
|
||||
|
@ -199,6 +205,7 @@ void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI,
|
|||
(Offset << AMDGPU::Hwreg::OFFSET_SHIFT_) |
|
||||
(AMDGPU::Hwreg::ID_MODE << AMDGPU::Hwreg::ID_SHIFT_));
|
||||
++NumSetregInserted;
|
||||
Changed = true;
|
||||
InstrMode.Mask &= ~(((1 << Width) - 1) << Offset);
|
||||
}
|
||||
}
|
||||
|
@ -323,22 +330,49 @@ void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,
|
|||
// exit value is propagated.
|
||||
void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
|
||||
const SIInstrInfo *TII) {
|
||||
// BlockData *BI = BlockInfo[MBB.getNumber()];
|
||||
bool RevisitRequired = false;
|
||||
bool ExitSet = false;
|
||||
unsigned ThisBlock = MBB.getNumber();
|
||||
if (MBB.pred_empty()) {
|
||||
// There are no predecessors, so use the default starting status.
|
||||
BlockInfo[ThisBlock]->Pred = DefaultStatus;
|
||||
ExitSet = true;
|
||||
} else {
|
||||
// Build a status that is common to all the predecessors by intersecting
|
||||
// all the predecessor exit status values.
|
||||
// Mask bits (which represent the Mode bits with a known value) can only be
|
||||
// added by explicit SETREG instructions or the initial default value -
|
||||
// the intersection process may remove Mask bits.
|
||||
// If we find a predecessor that has not yet had an exit value determined
|
||||
// (this can happen for example if a block is its own predecessor) we defer
|
||||
// use of that value as the Mask will be all zero, and we will revisit this
|
||||
// block again later (unless the only predecessor without an exit value is
|
||||
// this block).
|
||||
MachineBasicBlock::pred_iterator P = MBB.pred_begin(), E = MBB.pred_end();
|
||||
MachineBasicBlock &PB = *(*P);
|
||||
BlockInfo[ThisBlock]->Pred = BlockInfo[PB.getNumber()]->Exit;
|
||||
unsigned PredBlock = PB.getNumber();
|
||||
if ((ThisBlock == PredBlock) && (std::next(P) == E)) {
|
||||
BlockInfo[ThisBlock]->Pred = DefaultStatus;
|
||||
ExitSet = true;
|
||||
} else if (BlockInfo[PredBlock]->ExitSet) {
|
||||
BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
|
||||
ExitSet = true;
|
||||
} else if (PredBlock != ThisBlock)
|
||||
RevisitRequired = true;
|
||||
|
||||
for (P = std::next(P); P != E; P = std::next(P)) {
|
||||
MachineBasicBlock *Pred = *P;
|
||||
BlockInfo[ThisBlock]->Pred = BlockInfo[ThisBlock]->Pred.intersect(
|
||||
BlockInfo[Pred->getNumber()]->Exit);
|
||||
unsigned PredBlock = Pred->getNumber();
|
||||
if (BlockInfo[PredBlock]->ExitSet) {
|
||||
if (BlockInfo[ThisBlock]->ExitSet) {
|
||||
BlockInfo[ThisBlock]->Pred =
|
||||
BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[PredBlock]->Exit);
|
||||
} else {
|
||||
BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
|
||||
}
|
||||
ExitSet = true;
|
||||
} else if (PredBlock != ThisBlock)
|
||||
RevisitRequired = true;
|
||||
}
|
||||
}
|
||||
Status TmpStatus =
|
||||
|
@ -354,6 +388,9 @@ void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
|
|||
Phase2List.push(&B);
|
||||
}
|
||||
}
|
||||
BlockInfo[ThisBlock]->ExitSet = ExitSet;
|
||||
if (RevisitRequired)
|
||||
Phase2List.push(&MBB);
|
||||
}
|
||||
|
||||
// In Phase 3 we revisit each block and if it has an insertion point defined we
|
||||
|
@ -361,7 +398,6 @@ void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
|
|||
// not we insert an appropriate setreg instruction to modify the Mode register.
|
||||
void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB,
|
||||
const SIInstrInfo *TII) {
|
||||
// BlockData *BI = BlockInfo[MBB.getNumber()];
|
||||
unsigned ThisBlock = MBB.getNumber();
|
||||
if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) {
|
||||
Status Delta =
|
||||
|
@ -402,5 +438,5 @@ bool SIModeRegister::runOnMachineFunction(MachineFunction &MF) {
|
|||
|
||||
BlockInfo.clear();
|
||||
|
||||
return NumSetregInserted > 0;
|
||||
return Changed;
|
||||
}
|
||||
|
|
|
@ -457,3 +457,55 @@ body: |
|
|||
bb.4:
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
# checks for bug where if a block is its own predecessor it could cause mode tracking
|
||||
# to produce the wrong mode, resulting in an unnecessary setreg
|
||||
# CHECK-LABEL: name: single_block_loop
|
||||
# CHECK-LABEL: bb.0:
|
||||
# CHECK-NOT: S_SETREG
|
||||
|
||||
name: single_block_loop
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
successors: %bb.1
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
successors: %bb.1, %bb.2
|
||||
S_CBRANCH_VCCZ %bb.1, implicit $vcc
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.2:
|
||||
successors: %bb.3
|
||||
liveins: $vgpr1_vgpr2
|
||||
$vgpr1_vgpr2 = V_FRACT_F64_e32 killed $vgpr1_vgpr2, implicit $mode, implicit $exec
|
||||
S_BRANCH %bb.3
|
||||
|
||||
bb.3:
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
# checks for a bug where if the first block is its own predecessor the initial mode was
|
||||
# not correctly propagated, resulting in an unnecessary setreg
|
||||
# CHECK-LABEL: name: first_block_loop
|
||||
# CHECK-LABEL: bb.0:
|
||||
# CHECK-NOT: S_SETREG
|
||||
|
||||
name: first_block_loop
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
successors: %bb.0, %bb.1
|
||||
S_CBRANCH_VCCZ %bb.0, implicit $vcc
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
successors: %bb.2
|
||||
liveins: $vgpr1_vgpr2
|
||||
$vgpr1_vgpr2 = V_FRACT_F64_e32 killed $vgpr1_vgpr2, implicit $mode, implicit $exec
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
|
Loading…
Reference in New Issue