forked from OSchip/llvm-project
AMDGPU: Change API for checking for exec modification
Invert the name and return value to better reflect the imprecise nature. Force passing in the DefMI, since it's known in the 2 users and could possibly fail for an arbitrary vreg. Allow specifying a specific user instruction. Scan through use instructions, instead of use operands. Add scan thresholds instead of searching infinitely. Stop using a set to track seen uses. I didn't understand this usage, or why it would not check the last use. I don't think the use list has any particular order. llvm-svn: 363675
This commit is contained in:
parent
c99d9aee00
commit
f39f3bd056
|
@ -331,7 +331,7 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
|
||||||
auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
|
auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
|
||||||
assert(DstOpnd && DstOpnd->isReg());
|
assert(DstOpnd && DstOpnd->isReg());
|
||||||
auto DPPMovReg = DstOpnd->getReg();
|
auto DPPMovReg = DstOpnd->getReg();
|
||||||
if (!isEXECMaskConstantBetweenDefAndUses(DPPMovReg, *MRI)) {
|
if (execMayBeModifiedBeforeUse(*MRI, DPPMovReg, MovMI)) {
|
||||||
LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same"
|
LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same"
|
||||||
" for all uses\n");
|
" for all uses\n");
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -516,8 +516,10 @@ void SIFoldOperands::foldOperand(
|
||||||
// =>
|
// =>
|
||||||
// %sgpr = S_MOV_B32 imm
|
// %sgpr = S_MOV_B32 imm
|
||||||
if (FoldingImm) {
|
if (FoldingImm) {
|
||||||
if (!isEXECMaskConstantBetweenDefAndUses(
|
if (execMayBeModifiedBeforeUse(*MRI,
|
||||||
UseMI->getOperand(UseOpIdx).getReg(), *MRI))
|
UseMI->getOperand(UseOpIdx).getReg(),
|
||||||
|
*OpToFold.getParent(),
|
||||||
|
UseMI))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
UseMI->setDesc(TII->get(AMDGPU::S_MOV_B32));
|
UseMI->setDesc(TII->get(AMDGPU::S_MOV_B32));
|
||||||
|
@ -527,8 +529,10 @@ void SIFoldOperands::foldOperand(
|
||||||
}
|
}
|
||||||
|
|
||||||
if (OpToFold.isReg() && TRI->isSGPRReg(*MRI, OpToFold.getReg())) {
|
if (OpToFold.isReg() && TRI->isSGPRReg(*MRI, OpToFold.getReg())) {
|
||||||
if (!isEXECMaskConstantBetweenDefAndUses(
|
if (execMayBeModifiedBeforeUse(*MRI,
|
||||||
UseMI->getOperand(UseOpIdx).getReg(), *MRI))
|
UseMI->getOperand(UseOpIdx).getReg(),
|
||||||
|
*OpToFold.getParent(),
|
||||||
|
UseMI))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// %vgpr = COPY %sgpr0
|
// %vgpr = COPY %sgpr0
|
||||||
|
|
|
@ -6073,28 +6073,49 @@ MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool llvm::isEXECMaskConstantBetweenDefAndUses(unsigned VReg,
|
bool llvm::execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI,
|
||||||
const MachineRegisterInfo &MRI) {
|
unsigned VReg,
|
||||||
|
const MachineInstr &DefMI,
|
||||||
|
const MachineInstr *UseMI) {
|
||||||
assert(MRI.isSSA() && "Must be run on SSA");
|
assert(MRI.isSSA() && "Must be run on SSA");
|
||||||
|
assert(DefMI.definesRegister(VReg) && "wrong def instruction");
|
||||||
|
|
||||||
auto *TRI = MRI.getTargetRegisterInfo();
|
auto *TRI = MRI.getTargetRegisterInfo();
|
||||||
|
auto *DefBB = DefMI.getParent();
|
||||||
|
|
||||||
auto *DefI = MRI.getVRegDef(VReg);
|
if (UseMI) {
|
||||||
auto *BB = DefI->getParent();
|
// Don't bother searching between blocks, although it is possible this block
|
||||||
|
// doesn't modify exec.
|
||||||
|
if (UseMI->getParent() != DefBB)
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
int NumUse = 0;
|
||||||
|
const int MaxUseScan = 10;
|
||||||
|
|
||||||
DenseSet<MachineInstr*> Uses;
|
for (auto &UseInst : MRI.use_nodbg_instructions(VReg)) {
|
||||||
for (auto &Use : MRI.use_nodbg_operands(VReg)) {
|
if (UseInst.getParent() != DefBB)
|
||||||
auto *I = Use.getParent();
|
return true;
|
||||||
if (I->getParent() != BB)
|
|
||||||
return false;
|
if (NumUse++ > MaxUseScan)
|
||||||
Uses.insert(I);
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto E = BB->end();
|
const int MaxInstScan = 20;
|
||||||
for (auto I = std::next(DefI->getIterator()); I != E; ++I) {
|
int NumScan = 0;
|
||||||
Uses.erase(&*I);
|
|
||||||
// don't check the last use
|
// Stop scan at the use if known.
|
||||||
if (Uses.empty() || I->modifiesRegister(AMDGPU::EXEC, TRI))
|
auto E = UseMI ? UseMI->getIterator() : DefBB->end();
|
||||||
break;
|
for (auto I = std::next(DefMI.getIterator()); I != E; ++I) {
|
||||||
|
if (I->isDebugInstr())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (NumScan++ > MaxInstScan)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (I->modifiesRegister(AMDGPU::EXEC, TRI))
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
return Uses.empty();
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -977,11 +977,14 @@ TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
|
||||||
MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
|
MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
|
||||||
MachineRegisterInfo &MRI);
|
MachineRegisterInfo &MRI);
|
||||||
|
|
||||||
/// \brief Return true if EXEC mask isnt' changed between the def and
|
/// \brief Return false if EXEC is not changed between the def of \p VReg at \p
|
||||||
/// all uses of VReg. Currently if def and uses are in different BBs -
|
/// DefMI and uses. If \p UseMI is not specified, this checks all uses of \p
|
||||||
/// simply return false. Should be run on SSA.
|
/// VReg. Should be run on SSA. Currently does not attempt to track between
|
||||||
bool isEXECMaskConstantBetweenDefAndUses(unsigned VReg,
|
/// blocks.
|
||||||
const MachineRegisterInfo &MRI);
|
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI,
|
||||||
|
unsigned VReg,
|
||||||
|
const MachineInstr &DefMI,
|
||||||
|
const MachineInstr *UseMI = nullptr);
|
||||||
|
|
||||||
namespace AMDGPU {
|
namespace AMDGPU {
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue