[CGP] Lazily compute domtree only when needed during address matching

This is a compile time optimization for d9e93e8e5.  As pointed out in post dommit review on the original review (D96399), there was a moderately large compile time regression with this patch and the eager computation of domtree on matcher construction is the first obvious candidate for why.
This commit is contained in:
Philip Reames 2021-03-04 09:31:02 -08:00
parent 4e393350c5
commit e0cfd45171
1 changed files with 20 additions and 12 deletions

View File

@ -3083,7 +3083,7 @@ class AddressingModeMatcher {
const TargetRegisterInfo &TRI;
const DataLayout &DL;
const LoopInfo &LI;
const DominatorTree &DT;
const std::function<const DominatorTree &()> getDTFn;
/// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
/// the memory instruction that we're computing this address for.
@ -3120,14 +3120,15 @@ class AddressingModeMatcher {
AddressingModeMatcher(
SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
const TargetRegisterInfo &TRI, const LoopInfo &LI,
const DominatorTree &DT, Type *AT, unsigned AS, Instruction *MI,
ExtAddrMode &AM, const SetOfInstrs &InsertedInsts,
InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
const std::function<const DominatorTree &()> getDTFn,
Type *AT, unsigned AS, Instruction *MI, ExtAddrMode &AM,
const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
TypePromotionTransaction &TPT,
std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
: AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
DL(MI->getModule()->getDataLayout()), LI(LI), DT(DT), AccessTy(AT),
AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
DL(MI->getModule()->getDataLayout()), LI(LI), getDTFn(getDTFn),
AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) {
IgnoreProfitability = false;
@ -3144,7 +3145,8 @@ public:
static ExtAddrMode
Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
SmallVectorImpl<Instruction *> &AddrModeInsts,
const TargetLowering &TLI, const LoopInfo &LI, const DominatorTree &DT,
const TargetLowering &TLI, const LoopInfo &LI,
const std::function<const DominatorTree &()> getDTFn,
const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts,
InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
@ -3152,7 +3154,7 @@ public:
ExtAddrMode Result;
bool Success = AddressingModeMatcher(
AddrModeInsts, TLI, TRI, LI, DT, AccessTy, AS, MemoryInst, Result,
AddrModeInsts, TLI, TRI, LI, getDTFn, AccessTy, AS, MemoryInst, Result,
InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
BFI).matchAddr(V, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
@ -3910,7 +3912,7 @@ bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
Instruction *IVInc = IVStep->first;
APInt Step = IVStep->second;
APInt Offset = Step * AddrMode.Scale;
if (Offset.isSignedIntN(64) && DT.dominates(IVInc, MemoryInst)) {
if (Offset.isSignedIntN(64) && getDTFn().dominates(IVInc, MemoryInst)) {
TestAddrMode.InBounds = false;
TestAddrMode.ScaledReg = IVInc;
TestAddrMode.BaseOffs -= Offset.getLimitedValue();
@ -5016,7 +5018,7 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
0);
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, DT,
AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn,
AddressAccessTy, AS, MemoryInst, Result,
InsertedInsts, PromotedInsts, TPT,
LargeOffsetGEP, OptSize, PSI, BFI);
@ -5122,9 +5124,15 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
AddrModeInsts.clear();
std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
0);
Function *F = MemoryInst->getParent()->getParent();
// Defer the query (and possible computation of) the dom tree to point of
// actual use. It's expected that most address matches don't actually need
// the domtree.
auto getDTFn = [MemoryInst, this]() -> const DominatorTree & {
Function *F = MemoryInst->getParent()->getParent();
return this->getDT(*F);
};
ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDT(*F),
V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn,
*TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
BFI.get());