forked from OSchip/llvm-project
[PGOMemOPSize] Preserve the DominatorTree
Summary: PGOMemOPSize only modifies CFG in a couple of places; thus we can preserve the DominatorTree with little effort. When optimizing SQLite with -O3, this patch can decrease 3.8% of the numbers of nodes traversed by DFS and 5.7% of the times DominatorTreeBase::recalculation is called. Reviewers: kuhar, davide, dmgreen Reviewed By: dmgreen Subscribers: mzolotukhin, vsk, llvm-commits Differential Revision: https://reviews.llvm.org/D48914 llvm-svn: 336522
This commit is contained in:
parent
2dc3dac90c
commit
9e1e0c7b2a
|
@ -25,6 +25,8 @@
|
|||
#include "llvm/IR/BasicBlock.h"
|
||||
#include "llvm/IR/CallSite.h"
|
||||
#include "llvm/IR/DerivedTypes.h"
|
||||
#include "llvm/IR/DomTreeUpdater.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/InstVisitor.h"
|
||||
|
@ -112,6 +114,7 @@ private:
|
|||
AU.addRequired<BlockFrequencyInfoWrapperPass>();
|
||||
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
|
||||
AU.addPreserved<GlobalsAAWrapperPass>();
|
||||
AU.addPreserved<DominatorTreeWrapperPass>();
|
||||
}
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
@ -133,8 +136,8 @@ namespace {
|
|||
class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> {
|
||||
public:
|
||||
MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI,
|
||||
OptimizationRemarkEmitter &ORE)
|
||||
: Func(Func), BFI(BFI), ORE(ORE), Changed(false) {
|
||||
OptimizationRemarkEmitter &ORE, DominatorTree *DT)
|
||||
: Func(Func), BFI(BFI), ORE(ORE), DT(DT), Changed(false) {
|
||||
ValueDataArray =
|
||||
llvm::make_unique<InstrProfValueData[]>(MemOPMaxVersion + 2);
|
||||
// Get the MemOPSize range information from option MemOPSizeRange,
|
||||
|
@ -170,6 +173,7 @@ private:
|
|||
Function &Func;
|
||||
BlockFrequencyInfo &BFI;
|
||||
OptimizationRemarkEmitter &ORE;
|
||||
DominatorTree *DT;
|
||||
bool Changed;
|
||||
std::vector<MemIntrinsic *> WorkList;
|
||||
// Start of the previse range.
|
||||
|
@ -336,15 +340,16 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
|
|||
LLVM_DEBUG(dbgs() << *BB << "\n");
|
||||
auto OrigBBFreq = BFI.getBlockFreq(BB);
|
||||
|
||||
BasicBlock *DefaultBB = SplitBlock(BB, MI);
|
||||
BasicBlock *DefaultBB = SplitBlock(BB, MI, DT);
|
||||
BasicBlock::iterator It(*MI);
|
||||
++It;
|
||||
assert(It != DefaultBB->end());
|
||||
BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It));
|
||||
BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It), DT);
|
||||
MergeBB->setName("MemOP.Merge");
|
||||
BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency());
|
||||
DefaultBB->setName("MemOP.Default");
|
||||
|
||||
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
|
||||
auto &Ctx = Func.getContext();
|
||||
IRBuilder<> IRB(BB);
|
||||
BB->getTerminator()->eraseFromParent();
|
||||
|
@ -361,6 +366,10 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
|
|||
|
||||
LLVM_DEBUG(dbgs() << "\n\n== Basic Block After==\n");
|
||||
|
||||
std::vector<DominatorTree::UpdateType> Updates;
|
||||
if (DT)
|
||||
Updates.reserve(2 * SizeIds.size());
|
||||
|
||||
for (uint64_t SizeId : SizeIds) {
|
||||
BasicBlock *CaseBB = BasicBlock::Create(
|
||||
Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB);
|
||||
|
@ -375,8 +384,15 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
|
|||
IRBuilder<> IRBCase(CaseBB);
|
||||
IRBCase.CreateBr(MergeBB);
|
||||
SI->addCase(CaseSizeId, CaseBB);
|
||||
if (DT) {
|
||||
Updates.push_back({DominatorTree::Insert, CaseBB, MergeBB});
|
||||
Updates.push_back({DominatorTree::Insert, BB, CaseBB});
|
||||
}
|
||||
LLVM_DEBUG(dbgs() << *CaseBB << "\n");
|
||||
}
|
||||
DTU.applyUpdates(Updates);
|
||||
Updates.clear();
|
||||
|
||||
setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount);
|
||||
|
||||
LLVM_DEBUG(dbgs() << *BB << "\n");
|
||||
|
@ -397,13 +413,14 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
|
|||
} // namespace
|
||||
|
||||
static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI,
|
||||
OptimizationRemarkEmitter &ORE) {
|
||||
OptimizationRemarkEmitter &ORE,
|
||||
DominatorTree *DT) {
|
||||
if (DisableMemOPOPT)
|
||||
return false;
|
||||
|
||||
if (F.hasFnAttribute(Attribute::OptimizeForSize))
|
||||
return false;
|
||||
MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE);
|
||||
MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE, DT);
|
||||
MemOPSizeOpt.perform();
|
||||
return MemOPSizeOpt.isChanged();
|
||||
}
|
||||
|
@ -412,7 +429,9 @@ bool PGOMemOPSizeOptLegacyPass::runOnFunction(Function &F) {
|
|||
BlockFrequencyInfo &BFI =
|
||||
getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
|
||||
auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
|
||||
return PGOMemOPSizeOptImpl(F, BFI, ORE);
|
||||
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
|
||||
DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
|
||||
return PGOMemOPSizeOptImpl(F, BFI, ORE, DT);
|
||||
}
|
||||
|
||||
namespace llvm {
|
||||
|
@ -422,11 +441,13 @@ PreservedAnalyses PGOMemOPSizeOpt::run(Function &F,
|
|||
FunctionAnalysisManager &FAM) {
|
||||
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
|
||||
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
|
||||
bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE);
|
||||
auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
|
||||
bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE, DT);
|
||||
if (!Changed)
|
||||
return PreservedAnalyses::all();
|
||||
auto PA = PreservedAnalyses();
|
||||
PA.preserve<GlobalsAA>();
|
||||
PA.preserve<DominatorTreeAnalysis>();
|
||||
return PA;
|
||||
}
|
||||
} // namespace llvm
|
||||
|
|
|
@ -80,7 +80,6 @@
|
|||
; CHECK-NEXT: Lazy Block Frequency Analysis
|
||||
; CHECK-NEXT: Optimization Remark Emitter
|
||||
; CHECK-NEXT: PGOMemOPSize
|
||||
; CHECK-NEXT: Dominator Tree Construction
|
||||
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
|
||||
; CHECK-NEXT: Function Alias Analysis Results
|
||||
; CHECK-NEXT: Natural Loop Information
|
||||
|
|
|
@ -84,7 +84,6 @@
|
|||
; CHECK-NEXT: Lazy Block Frequency Analysis
|
||||
; CHECK-NEXT: Optimization Remark Emitter
|
||||
; CHECK-NEXT: PGOMemOPSize
|
||||
; CHECK-NEXT: Dominator Tree Construction
|
||||
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
|
||||
; CHECK-NEXT: Function Alias Analysis Results
|
||||
; CHECK-NEXT: Natural Loop Information
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: opt < %s -pgo-memop-opt -S | FileCheck %s
|
||||
; RUN: opt < %s -pgo-memop-opt -verify-dom-info -S | FileCheck %s
|
||||
|
||||
define i32 @test(i8* %a, i8* %b) !prof !1 {
|
||||
; CHECK_LABEL: test
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
; RUN: opt < %s -pgo-memop-opt -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -S | FileCheck %s --check-prefix=MEMOP_OPT
|
||||
; RUN: opt < %s -passes=pgo-memop-opt -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -S | FileCheck %s --check-prefix=MEMOP_OPT
|
||||
; RUN: opt < %s -pgo-memop-opt -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT
|
||||
; RUN: opt < %s -pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -S | FileCheck %s --check-prefix=MEMOP_OPT
|
||||
; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -S | FileCheck %s --check-prefix=MEMOP_OPT
|
||||
; RUN: opt < %s -pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT
|
||||
; RUN: FileCheck %s -input-file=%t.opt.yaml --check-prefix=YAML
|
||||
; RUN: opt < %s -passes=pgo-memop-opt -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT
|
||||
; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT
|
||||
; RUN: FileCheck %s -input-file=%t.opt.yaml --check-prefix=YAML
|
||||
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; Test to ensure the pgo memop optimization pass doesn't try to scale
|
||||
; up a value profile with a 0 count, which would lead to divide by 0.
|
||||
; RUN: opt < %s -passes=pgo-memop-opt -pgo-memop-count-threshold=1 -S | FileCheck %s --check-prefix=MEMOP_OPT
|
||||
; RUN: opt < %s -pgo-memop-opt -pgo-memop-count-threshold=1 -S | FileCheck %s --check-prefix=MEMOP_OPT
|
||||
; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=1 -S | FileCheck %s --check-prefix=MEMOP_OPT
|
||||
; RUN: opt < %s -pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=1 -S | FileCheck %s --check-prefix=MEMOP_OPT
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
|
Loading…
Reference in New Issue