[TailCallElim] Preserve DT and PDT

Summary:
Previously, in the NewPM pipeline, TailCallElim recalculates the DomTree when it modifies any instruction in the Function.
For example,
```
CallInst *CI = dyn_cast<CallInst>(&I);
...
CI->setTailCall();
Modified = true;
...
if (!Modified || ...)
  return PreservedAnalyses::all();
```
After applying this patch, the DomTree only recalculates if needed (plus an extra insertEdge() + an extra deleteEdge() call).

When optimizing SQLite with `-passes="default<O3>"` pipeline of the newPM, the number of DomTree recalculation decreases by 6.2%, the number of nodes visited by DFS decreases by 2.9%. The time used by DomTree will decrease approximately 1%~2.5% after applying the patch.
 
Statistics:
```
Before the patch:
 23010 dom-tree-stats               - Number of DomTree recalculations
489264 dom-tree-stats               - Number of nodes visited by DFS -- DomTree
After the patch:
 21581 dom-tree-stats               - Number of DomTree recalculations
475088 dom-tree-stats               - Number of nodes visited by DFS -- DomTree
```

Reviewers: kuhar, dmgreen, brzycki, grosser, davide

Reviewed By: kuhar, brzycki

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D49982

llvm-svn: 338954
This commit is contained in:
Chijun Sima 2018-08-04 08:13:47 +00:00
parent 03b4d0c19f
commit 8b5de48d62
16 changed files with 71 additions and 42 deletions

View File

@ -229,7 +229,8 @@ void SplitLandingPadPredecessors(BasicBlock *OrigBB,
/// value defined by a PHI, propagate the right value into the return. It /// value defined by a PHI, propagate the right value into the return. It
/// returns the new return instruction in the predecessor. /// returns the new return instruction in the predecessor.
ReturnInst *FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, ReturnInst *FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
BasicBlock *Pred); BasicBlock *Pred,
DomTreeUpdater *DTU = nullptr);
/// Split the containing block at the specified instruction - everything before /// Split the containing block at the specified instruction - everything before
/// SplitBefore stays in the old basic block, and the rest of the instructions /// SplitBefore stays in the old basic block, and the rest of the instructions

View File

@ -61,6 +61,7 @@
#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CFG.h" #include "llvm/IR/CFG.h"
#include "llvm/IR/CallSite.h" #include "llvm/IR/CallSite.h"
@ -68,6 +69,8 @@
#include "llvm/IR/DataLayout.h" #include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h" #include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h" #include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h" #include "llvm/IR/Instructions.h"
@ -488,12 +491,10 @@ static CallInst *findTRECandidate(Instruction *TI,
return CI; return CI;
} }
static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, static bool eliminateRecursiveTailCall(
BasicBlock *&OldEntry, CallInst *CI, ReturnInst *Ret, BasicBlock *&OldEntry,
bool &TailCallsAreMarkedTail, bool &TailCallsAreMarkedTail, SmallVectorImpl<PHINode *> &ArgumentPHIs,
SmallVectorImpl<PHINode *> &ArgumentPHIs, AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, DomTreeUpdater &DTU) {
AliasAnalysis *AA,
OptimizationRemarkEmitter *ORE) {
// If we are introducing accumulator recursion to eliminate operations after // If we are introducing accumulator recursion to eliminate operations after
// the call instruction that are both associative and commutative, the initial // the call instruction that are both associative and commutative, the initial
// value for the accumulator is placed in this variable. If this value is set // value for the accumulator is placed in this variable. If this value is set
@ -593,6 +594,10 @@ static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
PN->addIncoming(&*I, NewEntry); PN->addIncoming(&*I, NewEntry);
ArgumentPHIs.push_back(PN); ArgumentPHIs.push_back(PN);
} }
// The entry block was changed from OldEntry to NewEntry.
// The forward DominatorTree needs to be recalculated when the EntryBB is
// changed. In this corner-case we recalculate the entire tree.
DTU.recalculate(*NewEntry->getParent());
} }
// If this function has self recursive calls in the tail position where some // If this function has self recursive calls in the tail position where some
@ -668,6 +673,7 @@ static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
BB->getInstList().erase(Ret); // Remove return. BB->getInstList().erase(Ret); // Remove return.
BB->getInstList().erase(CI); // Remove call. BB->getInstList().erase(CI); // Remove call.
DTU.insertEdge(BB, OldEntry);
++NumEliminated; ++NumEliminated;
return true; return true;
} }
@ -676,7 +682,7 @@ static bool foldReturnAndProcessPred(
BasicBlock *BB, ReturnInst *Ret, BasicBlock *&OldEntry, BasicBlock *BB, ReturnInst *Ret, BasicBlock *&OldEntry,
bool &TailCallsAreMarkedTail, SmallVectorImpl<PHINode *> &ArgumentPHIs, bool &TailCallsAreMarkedTail, SmallVectorImpl<PHINode *> &ArgumentPHIs,
bool CannotTailCallElimCallsMarkedTail, const TargetTransformInfo *TTI, bool CannotTailCallElimCallsMarkedTail, const TargetTransformInfo *TTI,
AliasAnalysis *AA, OptimizationRemarkEmitter *ORE) { AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, DomTreeUpdater &DTU) {
bool Change = false; bool Change = false;
// Make sure this block is a trivial return block. // Make sure this block is a trivial return block.
@ -702,17 +708,17 @@ static bool foldReturnAndProcessPred(
if (CallInst *CI = findTRECandidate(BI, CannotTailCallElimCallsMarkedTail, TTI)){ if (CallInst *CI = findTRECandidate(BI, CannotTailCallElimCallsMarkedTail, TTI)){
LLVM_DEBUG(dbgs() << "FOLDING: " << *BB LLVM_DEBUG(dbgs() << "FOLDING: " << *BB
<< "INTO UNCOND BRANCH PRED: " << *Pred); << "INTO UNCOND BRANCH PRED: " << *Pred);
ReturnInst *RI = FoldReturnIntoUncondBranch(Ret, BB, Pred); ReturnInst *RI = FoldReturnIntoUncondBranch(Ret, BB, Pred, &DTU);
// Cleanup: if all predecessors of BB have been eliminated by // Cleanup: if all predecessors of BB have been eliminated by
// FoldReturnIntoUncondBranch, delete it. It is important to empty it, // FoldReturnIntoUncondBranch, delete it. It is important to empty it,
// because the ret instruction in there is still using a value which // because the ret instruction in there is still using a value which
// eliminateRecursiveTailCall will attempt to remove. // eliminateRecursiveTailCall will attempt to remove.
if (!BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) if (!BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
BB->eraseFromParent(); DTU.deleteBB(BB);
eliminateRecursiveTailCall(CI, RI, OldEntry, TailCallsAreMarkedTail, eliminateRecursiveTailCall(CI, RI, OldEntry, TailCallsAreMarkedTail,
ArgumentPHIs, AA, ORE); ArgumentPHIs, AA, ORE, DTU);
++NumRetDuped; ++NumRetDuped;
Change = true; Change = true;
} }
@ -721,24 +727,23 @@ static bool foldReturnAndProcessPred(
return Change; return Change;
} }
static bool processReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, static bool processReturningBlock(
bool &TailCallsAreMarkedTail, ReturnInst *Ret, BasicBlock *&OldEntry, bool &TailCallsAreMarkedTail,
SmallVectorImpl<PHINode *> &ArgumentPHIs, SmallVectorImpl<PHINode *> &ArgumentPHIs,
bool CannotTailCallElimCallsMarkedTail, bool CannotTailCallElimCallsMarkedTail, const TargetTransformInfo *TTI,
const TargetTransformInfo *TTI, AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, DomTreeUpdater &DTU) {
AliasAnalysis *AA,
OptimizationRemarkEmitter *ORE) {
CallInst *CI = findTRECandidate(Ret, CannotTailCallElimCallsMarkedTail, TTI); CallInst *CI = findTRECandidate(Ret, CannotTailCallElimCallsMarkedTail, TTI);
if (!CI) if (!CI)
return false; return false;
return eliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail, return eliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail,
ArgumentPHIs, AA, ORE); ArgumentPHIs, AA, ORE, DTU);
} }
static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI, static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI,
AliasAnalysis *AA, AliasAnalysis *AA,
OptimizationRemarkEmitter *ORE) { OptimizationRemarkEmitter *ORE,
DomTreeUpdater &DTU) {
if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true") if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
return false; return false;
@ -773,11 +778,11 @@ static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI,
if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) { if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
bool Change = processReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, bool Change = processReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
ArgumentPHIs, !CanTRETailMarkedCall, ArgumentPHIs, !CanTRETailMarkedCall,
TTI, AA, ORE); TTI, AA, ORE, DTU);
if (!Change && BB->getFirstNonPHIOrDbg() == Ret) if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
Change = foldReturnAndProcessPred(BB, Ret, OldEntry, Change = foldReturnAndProcessPred(
TailCallsAreMarkedTail, ArgumentPHIs, BB, Ret, OldEntry, TailCallsAreMarkedTail, ArgumentPHIs,
!CanTRETailMarkedCall, TTI, AA, ORE); !CanTRETailMarkedCall, TTI, AA, ORE, DTU);
MadeChange |= Change; MadeChange |= Change;
} }
} }
@ -810,16 +815,27 @@ struct TailCallElim : public FunctionPass {
AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>(); AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<PostDominatorTreeWrapperPass>();
} }
bool runOnFunction(Function &F) override { bool runOnFunction(Function &F) override {
if (skipFunction(F)) if (skipFunction(F))
return false; return false;
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>();
auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr;
// There is no noticable performance difference here between Lazy and Eager
// UpdateStrategy based on some test results. It is feasible to switch the
// UpdateStrategy to Lazy if we find it profitable later.
DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Eager);
return eliminateTailRecursion( return eliminateTailRecursion(
F, &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F), F, &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F),
&getAnalysis<AAResultsWrapperPass>().getAAResults(), &getAnalysis<AAResultsWrapperPass>().getAAResults(),
&getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE()); &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(), DTU);
} }
}; };
} }
@ -843,12 +859,19 @@ PreservedAnalyses TailCallElimPass::run(Function &F,
TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F); TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
AliasAnalysis &AA = AM.getResult<AAManager>(F); AliasAnalysis &AA = AM.getResult<AAManager>(F);
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F); auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
bool Changed = eliminateTailRecursion(F, &TTI, &AA, &ORE); auto *PDT = AM.getCachedResult<PostDominatorTreeAnalysis>(F);
// There is no noticable performance difference here between Lazy and Eager
// UpdateStrategy based on some test results. It is feasible to switch the
// UpdateStrategy to Lazy if we find it profitable later.
DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Eager);
bool Changed = eliminateTailRecursion(F, &TTI, &AA, &ORE, DTU);
if (!Changed) if (!Changed)
return PreservedAnalyses::all(); return PreservedAnalyses::all();
PreservedAnalyses PA; PreservedAnalyses PA;
PA.preserve<GlobalsAA>(); PA.preserve<GlobalsAA>();
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<PostDominatorTreeAnalysis>();
return PA; return PA;
} }

View File

@ -646,7 +646,8 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
} }
ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
BasicBlock *Pred) { BasicBlock *Pred,
DomTreeUpdater *DTU) {
Instruction *UncondBranch = Pred->getTerminator(); Instruction *UncondBranch = Pred->getTerminator();
// Clone the return and add it to the end of the predecessor. // Clone the return and add it to the end of the predecessor.
Instruction *NewRet = RI->clone(); Instruction *NewRet = RI->clone();
@ -680,6 +681,10 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
// longer branch to them. // longer branch to them.
BB->removePredecessor(Pred); BB->removePredecessor(Pred);
UncondBranch->eraseFromParent(); UncondBranch->eraseFromParent();
if (DTU)
DTU->deleteEdge(Pred, BB);
return cast<ReturnInst>(NewRet); return cast<ReturnInst>(NewRet);
} }

View File

@ -1,4 +1,4 @@
; RUN: opt < %s -tailcallelim -S | FileCheck %s ; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
; PR7328 ; PR7328
; PR7506 ; PR7506
define i32 @foo(i32 %x) { define i32 @foo(i32 %x) {

View File

@ -1,4 +1,4 @@
; RUN: opt -tailcallelim -S < %s 2>&1 | FileCheck %s ; RUN: opt -tailcallelim -verify-dom-info -S < %s 2>&1 | FileCheck %s
; CHECK: add nsw i32 ; CHECK: add nsw i32
; CHECK-NEXT: br label ; CHECK-NEXT: br label

View File

@ -1,5 +1,5 @@
; RUN: opt < %s -tailcallelim -S | FileCheck %s ; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
; RUN: opt < %s -passes=tailcallelim -S | FileCheck %s ; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s
define i32 @test1_factorial(i32 %x) { define i32 @test1_factorial(i32 %x) {
entry: entry:

View File

@ -1,6 +1,6 @@
; REQUIRES: asserts ; REQUIRES: asserts
; This function contains two tail calls, which should be eliminated ; This function contains two tail calls, which should be eliminated
; RUN: opt < %s -tailcallelim -stats -disable-output 2>&1 | grep "2 tailcallelim" ; RUN: opt < %s -tailcallelim -verify-dom-info -stats -disable-output 2>&1 | grep "2 tailcallelim"
define i32 @Ack(i32 %M.1, i32 %N.1) { define i32 @Ack(i32 %M.1, i32 %N.1) {
entry: entry:

View File

@ -1,4 +1,4 @@
; RUN: opt < %s -tailcallelim -S | FileCheck %s ; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
declare void @noarg() declare void @noarg()
declare void @use(i32*) declare void @use(i32*)

View File

@ -1,4 +1,4 @@
; RUN: opt < %s -tailcallelim -S | FileCheck %s ; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
define i32 @f_1(i32 %x) { define i32 @f_1(i32 %x) {
; CHECK-LABEL: @f_1( ; CHECK-LABEL: @f_1(

View File

@ -1,4 +1,4 @@
; RUN: opt < %s -tailcallelim -S | grep call | count 4 ; RUN: opt < %s -tailcallelim -verify-dom-info -S | grep call | count 4
; PR4323 ; PR4323
; Several cases where tail call elimination should not move the load above the ; Several cases where tail call elimination should not move the load above the

View File

@ -1,6 +1,6 @@
; REQUIRES: asserts ; REQUIRES: asserts
; Duplicate the return into if.end to enable TCE. ; Duplicate the return into if.end to enable TCE.
; RUN: opt -tailcallelim -stats -disable-output < %s 2>&1 | FileCheck %s ; RUN: opt -tailcallelim -verify-dom-info -stats -disable-output < %s 2>&1 | FileCheck %s
; CHECK: Number of return duplicated ; CHECK: Number of return duplicated

View File

@ -1,4 +1,4 @@
; RUN: opt < %s -tailcallelim -S | FileCheck %s ; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
; Don't turn this into an infinite loop, this is probably the implementation ; Don't turn this into an infinite loop, this is probably the implementation
; of fabs and we expect the codegen to lower fabs. ; of fabs and we expect the codegen to lower fabs.

View File

@ -1,4 +1,4 @@
; RUN: opt < %s -tailcallelim -S | FileCheck %s ; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
; CHECK: tail call void @callee0() ; CHECK: tail call void @callee0()
; CHECK: notail call void @callee1() ; CHECK: notail call void @callee1()

View File

@ -1,4 +1,4 @@
; RUN: opt %s -tailcallelim -pass-remarks=tailcallelim -o /dev/null 2>&1 | FileCheck %s ; RUN: opt %s -tailcallelim -verify-dom-info -pass-remarks=tailcallelim -o /dev/null 2>&1 | FileCheck %s
; RUN: opt %s -o /dev/null -passes='require<opt-remark-emit>,tailcallelim' -pass-remarks=tailcallelim 2>&1 | FileCheck %s ; RUN: opt %s -o /dev/null -passes='require<opt-remark-emit>,tailcallelim' -pass-remarks=tailcallelim 2>&1 | FileCheck %s
; CHECK: /home/davide/pat.c:2:20: transforming tail recursion into loop ; CHECK: /home/davide/pat.c:2:20: transforming tail recursion into loop

View File

@ -1,4 +1,4 @@
; RUN: opt < %s -tailcallelim -S | FileCheck %s ; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
; PR4323 ; PR4323
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

View File

@ -1,4 +1,4 @@
; RUN: opt < %s -tailcallelim -S | FileCheck %s ; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
; Test that we don't tail call in a functions that calls returns_twice ; Test that we don't tail call in a functions that calls returns_twice
; functions. ; functions.