AMDGPU: Change API for checking for exec modification

Invert the name and return value to better reflect the imprecise
nature.

Force passing in the DefMI, since it's known in the 2 users and could
possibly fail for an arbitrary vreg.

Allow specifying a specific user instruction. Scan through use
instructions, instead of use operands. Add scan thresholds instead of
searching infinitely.

Stop using a set to track seen uses. I didn't understand this usage,
or why it would not check the last use. I don't think the use list has
any particular order.

llvm-svn: 363675
This commit is contained in:
Matt Arsenault 2019-06-18 12:48:36 +00:00
parent c99d9aee00
commit f39f3bd056
4 changed files with 55 additions and 27 deletions

View File

@ -331,7 +331,7 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
assert(DstOpnd && DstOpnd->isReg());
auto DPPMovReg = DstOpnd->getReg();
if (!isEXECMaskConstantBetweenDefAndUses(DPPMovReg, *MRI)) {
if (execMayBeModifiedBeforeUse(*MRI, DPPMovReg, MovMI)) {
LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same"
" for all uses\n");
return false;

View File

@ -516,8 +516,10 @@ void SIFoldOperands::foldOperand(
// =>
// %sgpr = S_MOV_B32 imm
if (FoldingImm) {
if (!isEXECMaskConstantBetweenDefAndUses(
UseMI->getOperand(UseOpIdx).getReg(), *MRI))
if (execMayBeModifiedBeforeUse(*MRI,
UseMI->getOperand(UseOpIdx).getReg(),
*OpToFold.getParent(),
UseMI))
return;
UseMI->setDesc(TII->get(AMDGPU::S_MOV_B32));
@ -527,8 +529,10 @@ void SIFoldOperands::foldOperand(
}
if (OpToFold.isReg() && TRI->isSGPRReg(*MRI, OpToFold.getReg())) {
if (!isEXECMaskConstantBetweenDefAndUses(
UseMI->getOperand(UseOpIdx).getReg(), *MRI))
if (execMayBeModifiedBeforeUse(*MRI,
UseMI->getOperand(UseOpIdx).getReg(),
*OpToFold.getParent(),
UseMI))
return;
// %vgpr = COPY %sgpr0

View File

@ -6073,28 +6073,49 @@ MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
return nullptr;
}
bool llvm::isEXECMaskConstantBetweenDefAndUses(unsigned VReg,
const MachineRegisterInfo &MRI) {
bool llvm::execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI,
unsigned VReg,
const MachineInstr &DefMI,
const MachineInstr *UseMI) {
assert(MRI.isSSA() && "Must be run on SSA");
assert(DefMI.definesRegister(VReg) && "wrong def instruction");
auto *TRI = MRI.getTargetRegisterInfo();
auto *DefBB = DefMI.getParent();
auto *DefI = MRI.getVRegDef(VReg);
auto *BB = DefI->getParent();
if (UseMI) {
// Don't bother searching between blocks, although it is possible this block
// doesn't modify exec.
if (UseMI->getParent() != DefBB)
return true;
} else {
int NumUse = 0;
const int MaxUseScan = 10;
DenseSet<MachineInstr*> Uses;
for (auto &Use : MRI.use_nodbg_operands(VReg)) {
auto *I = Use.getParent();
if (I->getParent() != BB)
return false;
Uses.insert(I);
for (auto &UseInst : MRI.use_nodbg_instructions(VReg)) {
if (UseInst.getParent() != DefBB)
return true;
if (NumUse++ > MaxUseScan)
return true;
}
}
auto E = BB->end();
for (auto I = std::next(DefI->getIterator()); I != E; ++I) {
Uses.erase(&*I);
// don't check the last use
if (Uses.empty() || I->modifiesRegister(AMDGPU::EXEC, TRI))
break;
const int MaxInstScan = 20;
int NumScan = 0;
// Stop scan at the use if known.
auto E = UseMI ? UseMI->getIterator() : DefBB->end();
for (auto I = std::next(DefMI.getIterator()); I != E; ++I) {
if (I->isDebugInstr())
continue;
if (NumScan++ > MaxInstScan)
return true;
if (I->modifiesRegister(AMDGPU::EXEC, TRI))
return true;
}
return Uses.empty();
return false;
}

View File

@ -977,11 +977,14 @@ TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
MachineRegisterInfo &MRI);
/// \brief Return true if EXEC mask isnt' changed between the def and
/// all uses of VReg. Currently if def and uses are in different BBs -
/// simply return false. Should be run on SSA.
bool isEXECMaskConstantBetweenDefAndUses(unsigned VReg,
const MachineRegisterInfo &MRI);
/// \brief Return false if EXEC is not changed between the def of \p VReg at \p
/// DefMI and uses. If \p UseMI is not specified, this checks all uses of \p
/// VReg. Should be run on SSA. Currently does not attempt to track between
/// blocks.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI,
unsigned VReg,
const MachineInstr &DefMI,
const MachineInstr *UseMI = nullptr);
namespace AMDGPU {