diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp index 430cf480eb19..fb1d71779cb3 100644 --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -331,7 +331,7 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst); assert(DstOpnd && DstOpnd->isReg()); auto DPPMovReg = DstOpnd->getReg(); - if (!isEXECMaskConstantBetweenDefAndUses(DPPMovReg, *MRI)) { + if (execMayBeModifiedBeforeUse(*MRI, DPPMovReg, MovMI)) { LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same" " for all uses\n"); return false; diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 00698e2dd4e1..4a1fc1332c36 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -516,8 +516,10 @@ void SIFoldOperands::foldOperand( // => // %sgpr = S_MOV_B32 imm if (FoldingImm) { - if (!isEXECMaskConstantBetweenDefAndUses( - UseMI->getOperand(UseOpIdx).getReg(), *MRI)) + if (execMayBeModifiedBeforeUse(*MRI, + UseMI->getOperand(UseOpIdx).getReg(), + *OpToFold.getParent(), + UseMI)) return; UseMI->setDesc(TII->get(AMDGPU::S_MOV_B32)); @@ -527,8 +529,10 @@ void SIFoldOperands::foldOperand( } if (OpToFold.isReg() && TRI->isSGPRReg(*MRI, OpToFold.getReg())) { - if (!isEXECMaskConstantBetweenDefAndUses( - UseMI->getOperand(UseOpIdx).getReg(), *MRI)) + if (execMayBeModifiedBeforeUse(*MRI, + UseMI->getOperand(UseOpIdx).getReg(), + *OpToFold.getParent(), + UseMI)) return; // %vgpr = COPY %sgpr0 diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 58f4d95c97a2..bb0076c6db38 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6073,28 +6073,49 @@ MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, return nullptr; } -bool llvm::isEXECMaskConstantBetweenDefAndUses(unsigned VReg, - const MachineRegisterInfo &MRI) { +bool llvm::execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, + unsigned VReg, + const MachineInstr &DefMI, + const MachineInstr *UseMI) { assert(MRI.isSSA() && "Must be run on SSA"); + assert(DefMI.definesRegister(VReg) && "wrong def instruction"); + auto *TRI = MRI.getTargetRegisterInfo(); + auto *DefBB = DefMI.getParent(); - auto *DefI = MRI.getVRegDef(VReg); - auto *BB = DefI->getParent(); + if (UseMI) { + // Don't bother searching between blocks, although it is possible this block + // doesn't modify exec. + if (UseMI->getParent() != DefBB) + return true; + } else { + int NumUse = 0; + const int MaxUseScan = 10; - DenseSet Uses; - for (auto &Use : MRI.use_nodbg_operands(VReg)) { - auto *I = Use.getParent(); - if (I->getParent() != BB) - return false; - Uses.insert(I); + for (auto &UseInst : MRI.use_nodbg_instructions(VReg)) { + if (UseInst.getParent() != DefBB) + return true; + + if (NumUse++ > MaxUseScan) + return true; + } } - auto E = BB->end(); - for (auto I = std::next(DefI->getIterator()); I != E; ++I) { - Uses.erase(&*I); - // don't check the last use - if (Uses.empty() || I->modifiesRegister(AMDGPU::EXEC, TRI)) - break; + const int MaxInstScan = 20; + int NumScan = 0; + + // Stop scan at the use if known. + auto E = UseMI ? UseMI->getIterator() : DefBB->end(); + for (auto I = std::next(DefMI.getIterator()); I != E; ++I) { + if (I->isDebugInstr()) + continue; + + if (NumScan++ > MaxInstScan) + return true; + + if (I->modifiesRegister(AMDGPU::EXEC, TRI)) + return true; } - return Uses.empty(); + + return false; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 2d31df5298d6..99e8da670948 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -977,11 +977,14 @@ TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI); -/// \brief Return true if EXEC mask isnt' changed between the def and -/// all uses of VReg. Currently if def and uses are in different BBs - -/// simply return false. Should be run on SSA. -bool isEXECMaskConstantBetweenDefAndUses(unsigned VReg, - const MachineRegisterInfo &MRI); +/// \brief Return false if EXEC is not changed between the def of \p VReg at \p +/// DefMI and uses. If \p UseMI is not specified, this checks all uses of \p +/// VReg. Should be run on SSA. Currently does not attempt to track between +/// blocks. +bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, + unsigned VReg, + const MachineInstr &DefMI, + const MachineInstr *UseMI = nullptr); namespace AMDGPU {