[MergeICmps] Preserve the dominator tree.

Summary: In preparation for D60318 .

Reviewers: gchatelet, efriedma

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62068

llvm-svn: 361239
This commit is contained in:
Clement Courbet 2019-05-21 11:02:23 +00:00
parent 823458f9b8
commit a95d95d392
15 changed files with 77 additions and 37 deletions

View File

@ -41,9 +41,12 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/Pass.h"
@ -396,9 +399,10 @@ class BCECmpChain {
void dump() const;
#endif // MERGEICMPS_DOT_ON
bool simplify(const TargetLibraryInfo *const TLI, AliasAnalysis *AA);
bool simplify(const TargetLibraryInfo *const TLI, AliasAnalysis *AA,
DomTreeUpdater &DTU);
private:
private:
static bool IsContiguous(const BCECmpBlock &First,
const BCECmpBlock &Second) {
return First.Lhs().BaseId == Second.Lhs().BaseId &&
@ -583,10 +587,11 @@ private:
// Merges the given contiguous comparison blocks into one memcmp block.
static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
BasicBlock *const InsertBefore,
BasicBlock *const NextCmpBlock,
PHINode &Phi,
const TargetLibraryInfo *const TLI,
AliasAnalysis *AA) {
AliasAnalysis *AA, DomTreeUpdater &DTU) {
assert(!Comparisons.empty() && "merging zero comparisons");
LLVMContext &Context = NextCmpBlock->getContext();
const BCECmpBlock &FirstCmp = Comparisons[0];
@ -594,13 +599,15 @@ static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
// Create a new cmp block before next cmp block.
BasicBlock *const BB =
BasicBlock::Create(Context, MergedBlockName(Comparisons).Name,
NextCmpBlock->getParent(), NextCmpBlock);
NextCmpBlock->getParent(), InsertBefore);
IRBuilder<> Builder(BB);
// Add the GEPs from the first BCECmpBlock.
Value *const Lhs = Builder.Insert(FirstCmp.Lhs().GEP->clone());
Value *const Rhs = Builder.Insert(FirstCmp.Rhs().GEP->clone());
Value *IsEqual = nullptr;
LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons -> "
<< BB->getName() << "\n");
if (Comparisons.size() == 1) {
LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n");
Value *const LhsLoad =
@ -610,8 +617,6 @@ static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
// There are no blocks to merge, just do the comparison.
IsEqual = Builder.CreateICmpEQ(LhsLoad, RhsLoad);
} else {
LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons\n");
// If there is one block that requires splitting, we do it now, i.e.
// just before we know we will collapse the chain. The instructions
// can be executed before any of the instructions in the chain.
@ -641,18 +646,21 @@ static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
// Add a branch to the next basic block in the chain.
if (NextCmpBlock == PhiBB) {
// Continue to phi, passing it the comparison result.
Builder.CreateBr(Phi.getParent());
Builder.CreateBr(PhiBB);
Phi.addIncoming(IsEqual, BB);
DTU.applyUpdates({{DominatorTree::Insert, BB, PhiBB}});
} else {
// Continue to next block if equal, exit to phi else.
Builder.CreateCondBr(IsEqual, NextCmpBlock, PhiBB);
Phi.addIncoming(ConstantInt::getFalse(Context), BB);
DTU.applyUpdates({{DominatorTree::Insert, BB, NextCmpBlock},
{DominatorTree::Insert, BB, PhiBB}});
}
return BB;
}
bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI,
AliasAnalysis *AA) {
AliasAnalysis *AA, DomTreeUpdater &DTU) {
assert(Comparisons_.size() >= 2 && "simplifying trivial BCECmpChain");
// First pass to check if there is at least one merge. If not, we don't do
// anything and we keep analysis passes intact.
@ -671,9 +679,11 @@ bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI,
// Effectively merge blocks. We go in the reverse direction from the phi block
// so that the next block is always available to branch to.
const auto mergeRange = [this, TLI, AA](int I, int Num, BasicBlock *Next) {
return mergeComparisons(makeArrayRef(Comparisons_).slice(I, Num), Next,
Phi_, TLI, AA);
const auto mergeRange = [this, TLI, AA, &DTU](int I, int Num,
BasicBlock *InsertBefore,
BasicBlock *Next) {
return mergeComparisons(makeArrayRef(Comparisons_).slice(I, Num),
InsertBefore, Next, Phi_, TLI, AA, DTU);
};
int NumMerged = 1;
BasicBlock *NextCmpBlock = Phi_.getParent();
@ -684,11 +694,14 @@ bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI,
<< "\n");
++NumMerged;
} else {
NextCmpBlock = mergeRange(I + 1, NumMerged, NextCmpBlock);
NextCmpBlock = mergeRange(I + 1, NumMerged, NextCmpBlock, NextCmpBlock);
NumMerged = 1;
}
}
NextCmpBlock = mergeRange(0, NumMerged, NextCmpBlock);
// Insert the entry block for the new chain before the old entry block.
// If the old entry block was the function entry, this ensures that the new
// entry can become the function entry.
NextCmpBlock = mergeRange(0, NumMerged, EntryBlock_, NextCmpBlock);
// Replace the original cmp chain with the new cmp chain by pointing all
// predecessors of EntryBlock_ to NextCmpBlock instead. This makes all cmp
@ -698,6 +711,20 @@ bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI,
LLVM_DEBUG(dbgs() << "Updating jump into old chain from " << Pred->getName()
<< "\n");
Pred->getTerminator()->replaceUsesOfWith(EntryBlock_, NextCmpBlock);
DTU.applyUpdates({{DominatorTree::Delete, Pred, EntryBlock_},
{DominatorTree::Insert, Pred, NextCmpBlock}});
}
// If the old cmp chain was the function entry, we need to update the function
// entry.
const bool ChainEntryIsFnEntry =
(EntryBlock_ == &EntryBlock_->getParent()->getEntryBlock());
if (ChainEntryIsFnEntry && DTU.hasDomTree()) {
LLVM_DEBUG(dbgs() << "Changing function entry from "
<< EntryBlock_->getName() << " to "
<< NextCmpBlock->getName() << "\n");
DTU.getDomTree().setNewRoot(NextCmpBlock);
DTU.applyUpdates({{DominatorTree::Delete, NextCmpBlock, EntryBlock_}});
}
EntryBlock_ = nullptr;
@ -707,7 +734,7 @@ bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI,
LLVM_DEBUG(dbgs() << "Deleting merged block " << Cmp.BB->getName() << "\n");
DeadBlocks.push_back(Cmp.BB);
}
DeleteDeadBlocks(DeadBlocks);
DeleteDeadBlocks(DeadBlocks, &DTU);
Comparisons_.clear();
return true;
@ -749,7 +776,7 @@ std::vector<BasicBlock *> getOrderedBlocks(PHINode &Phi,
}
bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI,
AliasAnalysis *AA) {
AliasAnalysis *AA, DomTreeUpdater &DTU) {
LLVM_DEBUG(dbgs() << "processPhi()\n");
if (Phi.getNumIncomingValues() <= 1) {
LLVM_DEBUG(dbgs() << "skip: only one incoming value in phi\n");
@ -814,7 +841,7 @@ bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI,
return false;
}
return CmpChain.simplify(TLI, AA);
return CmpChain.simplify(TLI, AA, DTU);
}
class MergeICmps : public FunctionPass {
@ -829,8 +856,14 @@ class MergeICmps : public FunctionPass {
if (skipFunction(F)) return false;
const auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
// MergeICmps does not need the DominatorTree, but we update it if it's
// already available.
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DomTreeUpdater DTU(DTWP ? &DTWP->getDomTree() : nullptr,
/*PostDominatorTree*/ nullptr,
DomTreeUpdater::UpdateStrategy::Eager);
AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
auto PA = runImpl(F, &TLI, &TTI, AA);
auto PA = runImpl(F, &TLI, &TTI, AA, DTU);
return !PA.areAllPreserved();
}
@ -839,15 +872,18 @@ class MergeICmps : public FunctionPass {
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<AAResultsWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
}
PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI, AliasAnalysis *AA);
const TargetTransformInfo *TTI, AliasAnalysis *AA,
DomTreeUpdater &DTU);
};
PreservedAnalyses MergeICmps::runImpl(Function &F, const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,
AliasAnalysis *AA) {
AliasAnalysis *AA, DomTreeUpdater &DTU) {
LLVM_DEBUG(dbgs() << "MergeICmpsPass: " << F.getName() << "\n");
// We only try merging comparisons if the target wants to expand memcmp later.
@ -863,11 +899,15 @@ PreservedAnalyses MergeICmps::runImpl(Function &F, const TargetLibraryInfo *TLI,
for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) {
// A Phi operation is always first in a basic block.
if (auto *const Phi = dyn_cast<PHINode>(&*BBIt->begin()))
MadeChange |= processPhi(*Phi, TLI, AA);
MadeChange |= processPhi(*Phi, TLI, AA, DTU);
}
if (MadeChange) return PreservedAnalyses::none();
return PreservedAnalyses::all();
if (!MadeChange)
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserve<GlobalsAA>();
PA.preserve<DominatorTreeAnalysis>();
return PA;
}
} // namespace

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mergeicmps -S | FileCheck %s --check-prefix=X86
; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mergeicmps -verify-dom-info -S | FileCheck %s --check-prefix=X86
%S = type { i32, i32, i32, i32 }

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s
; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S | FileCheck %s
%S = type { i32, i32 }

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s
; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S | FileCheck %s
%S = type { i32, i32, i32, i32 }

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s
; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S | FileCheck %s
%S = type { i32, i32 }

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mergeicmps -S | FileCheck %s --check-prefix=X86
; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mergeicmps -verify-dom-info -S | FileCheck %s --check-prefix=X86
; 8-byte int and 8-byte pointer should merge into a 16-byte memcmp.
; X86: memcmp(i8* {{.*}}, i8* {{.*}}, i64 16)

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s
; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S | FileCheck %s
%S = type { i32, i32, i32 }

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86
; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86
%S = type { i32, i32, i32, i32 }

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86
; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S -disable-simplify-libcalls | FileCheck %s --check-prefix=X86-NOBUILTIN
; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86
; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S -disable-simplify-libcalls | FileCheck %s --check-prefix=X86-NOBUILTIN
%S = type { i32, i32 }

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86
; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86
source_filename = "qabstractitemmodeltester.cpp"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mergeicmps -S | FileCheck %s
; RUN: opt < %s -mergeicmps -verify-dom-info -S | FileCheck %s
target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
target triple = "i386-pc-windows-msvc19.11.0"

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86
; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86
%S = type { i32, i32, i32, i32 }

View File

@ -1,6 +1,6 @@
; XFAIL: *
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s
; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S | FileCheck %s
; This is a more involved test: clang generates this weird pattern for
; tuple<uint8_t, uint8_t, uint8_t, uint8_t>. Right now we skip the entry block

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86
; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86
%S = type { i32, i32 }

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s
; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S | FileCheck %s
%S = type { i32, i32 }