Fine tuning of sample profile propagation algorithm.

Summary: The refined propagation algorithm is more accurate and robust.

Reviewers: davidxl, dnovillo

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D23224

llvm-svn: 278522
This commit is contained in:
Dehao Chen 2016-08-12 16:22:12 +00:00
parent 4a1e87d9c6
commit c0a1e432c7
11 changed files with 129 additions and 59 deletions

View File

@ -125,7 +125,7 @@ protected:
void propagateWeights(Function &F);
uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
void buildEdges(Function &F);
bool propagateThroughEdges(Function &F);
bool propagateThroughEdges(Function &F, bool UpdateBlockCount);
void computeDominanceAndLoopInfo(Function &F);
unsigned getOffset(unsigned L, unsigned H) const;
void clearFunctionData();
@ -460,11 +460,19 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) const {
if (!FS)
return std::error_code();
// Ignore all dbg_value intrinsics.
const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Inst);
if (II && II->getIntrinsicID() == Intrinsic::dbg_value)
// Ignore all intrinsics and branch instructions.
// Branch instruction usually contains debug info from sources outside of
// the residing basic block, thus we ignore them during annotation.
if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst))
return std::error_code();
// If a call instruction is inlined in profile, but not inlined here,
// it means that the inlined callsite has no sample, thus the call
// instruction should have 0 count.
const CallInst *CI = dyn_cast<CallInst>(&Inst);
if (CI && findCalleeFunctionSamples(*CI))
return 0;
const DILocation *DIL = DLoc;
unsigned Lineno = DLoc.getLine();
unsigned HeaderLineno = DIL->getScope()->getSubprogram()->getLine();
@ -488,13 +496,6 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) const {
<< Inst << " (line offset: " << Lineno - HeaderLineno << "."
<< DIL->getDiscriminator() << " - weight: " << R.get()
<< ")\n");
} else {
// If a call instruction is inlined in profile, but not inlined here,
// it means that the inlined callsite has no sample, thus the call
// instruction should have 0 count.
const CallInst *CI = dyn_cast<CallInst>(&Inst);
if (CI && findCalleeFunctionSamples(*CI))
R = 0;
}
return R;
}
@ -695,6 +696,10 @@ void SampleProfileLoader::findEquivalencesFor(
bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
if (BB1 != BB2 && IsDomParent && IsInSameLoop) {
EquivalenceClass[BB2] = EC;
// If BB2 is visited, then the entire EC should be marked as visited.
if (VisitedBlocks.count(BB2)) {
VisitedBlocks.insert(EC);
}
// If BB2 is heavier than BB1, make BB2 have the same weight
// as BB1.
@ -707,7 +712,11 @@ void SampleProfileLoader::findEquivalencesFor(
Weight = std::max(Weight, BlockWeights[BB2]);
}
}
BlockWeights[EC] = Weight;
if (EC == &EC->getParent()->getEntryBlock()) {
BlockWeights[EC] = Samples->getHeadSamples() + 1;
} else {
BlockWeights[EC] = Weight;
}
}
/// \brief Find equivalence classes.
@ -798,9 +807,12 @@ uint64_t SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges,
/// count of the basic block, if needed.
///
/// \param F Function to process.
/// \param UpdateBlockCount Whether we should update basic block counts that
/// has already been annotated.
///
/// \returns True if new weights were assigned to edges or blocks.
bool SampleProfileLoader::propagateThroughEdges(Function &F) {
bool SampleProfileLoader::propagateThroughEdges(Function &F,
bool UpdateBlockCount) {
bool Changed = false;
DEBUG(dbgs() << "\nPropagation through edges\n");
for (const auto &BI : F) {
@ -892,11 +904,35 @@ bool SampleProfileLoader::propagateThroughEdges(Function &F) {
EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
else
EdgeWeights[UnknownEdge] = 0;
const BasicBlock *OtherEC;
if (i == 0)
OtherEC = EquivalenceClass[UnknownEdge.first];
else
OtherEC = EquivalenceClass[UnknownEdge.second];
// Edge weights should never exceed the BB weights it connects.
if (VisitedBlocks.count(OtherEC) &&
EdgeWeights[UnknownEdge] > BlockWeights[OtherEC])
EdgeWeights[UnknownEdge] = BlockWeights[OtherEC];
VisitedEdges.insert(UnknownEdge);
Changed = true;
DEBUG(dbgs() << "Set weight for edge: ";
printEdgeWeight(dbgs(), UnknownEdge));
}
} else if (VisitedBlocks.count(EC) && BlockWeights[EC] == 0) {
// If a block Weights 0, all its in/out edges should weight 0.
if (i == 0) {
for (auto *Pred : Predecessors[BB]) {
Edge E = std::make_pair(Pred, BB);
EdgeWeights[E] = 0;
VisitedEdges.insert(E);
}
} else {
for (auto *Succ : Successors[BB]) {
Edge E = std::make_pair(BB, Succ);
EdgeWeights[E] = 0;
VisitedEdges.insert(E);
}
}
} else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) {
uint64_t &BBWeight = BlockWeights[BB];
// We have a self-referential edge and the weight of BB is known.
@ -909,6 +945,11 @@ bool SampleProfileLoader::propagateThroughEdges(Function &F) {
DEBUG(dbgs() << "Set self-referential edge weight to: ";
printEdgeWeight(dbgs(), SelfReferentialEdge));
}
if (UpdateBlockCount && !VisitedBlocks.count(EC) && TotalWeight > 0) {
BlockWeights[EC] = TotalWeight;
VisitedBlocks.insert(EC);
Changed = true;
}
}
}
@ -968,7 +1009,21 @@ void SampleProfileLoader::propagateWeights(Function &F) {
// Add an entry count to the function using the samples gathered
// at the function entry.
F.setEntryCount(Samples->getHeadSamples());
F.setEntryCount(Samples->getHeadSamples() + 1);
// If BB weight is larger than its corresponding loop's header BB weight,
// use the BB weight to replace the loop header BB weight.
for (auto &BI : F) {
BasicBlock *BB = &BI;
Loop *L = LI->getLoopFor(BB);
if (!L) {
continue;
}
BasicBlock *Header = L->getHeader();
if (Header && BlockWeights[BB] > BlockWeights[Header]) {
BlockWeights[Header] = BlockWeights[BB];
}
}
// Before propagation starts, build, for each block, a list of
// unique predecessors and successors. This is necessary to handle
@ -979,7 +1034,23 @@ void SampleProfileLoader::propagateWeights(Function &F) {
// Propagate until we converge or we go past the iteration limit.
while (Changed && I++ < SampleProfileMaxPropagateIterations) {
Changed = propagateThroughEdges(F);
Changed = propagateThroughEdges(F, false);
}
// The first propagation propagates BB counts from annotated BBs to unknown
// BBs. The 2nd propagation pass resets edges weights, and use all BB weights
// to propagate edge weights.
VisitedEdges.clear();
Changed = true;
while (Changed && I++ < SampleProfileMaxPropagateIterations) {
Changed = propagateThroughEdges(F, false);
}
// The 3rd propagation pass allows adjust annotated BB weights that are
// obviously wrong.
Changed = true;
while (Changed && I++ < SampleProfileMaxPropagateIterations) {
Changed = propagateThroughEdges(F, true);
}
// Generate MD_prof metadata for every branch instruction using the
@ -1025,7 +1096,9 @@ void SampleProfileLoader::propagateWeights(Function &F) {
DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
Weight = std::numeric_limits<uint32_t>::max();
}
Weights.push_back(static_cast<uint32_t>(Weight));
// Weight is added by one to avoid propagation errors introduced by
// 0 weights.
Weights.push_back(static_cast<uint32_t>(Weight + 1));
if (Weight != 0) {
if (Weight > MaxWeight) {
MaxWeight = Weight;
@ -1036,21 +1109,17 @@ void SampleProfileLoader::propagateWeights(Function &F) {
// Only set weights if there is at least one non-zero weight.
// In any other case, let the analyzer set weights.
if (MaxWeight > 0) {
DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
TI->setMetadata(llvm::LLVMContext::MD_prof,
MDB.createBranchWeights(Weights));
DebugLoc BranchLoc = TI->getDebugLoc();
emitOptimizationRemark(
Ctx, DEBUG_TYPE, F, MaxDestLoc,
Twine("most popular destination for conditional branches at ") +
((BranchLoc) ? Twine(BranchLoc->getFilename() + ":" +
Twine(BranchLoc.getLine()) + ":" +
Twine(BranchLoc.getCol()))
: Twine("<UNKNOWN LOCATION>")));
} else {
DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
}
DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
TI->setMetadata(llvm::LLVMContext::MD_prof,
MDB.createBranchWeights(Weights));
DebugLoc BranchLoc = TI->getDebugLoc();
emitOptimizationRemark(
Ctx, DEBUG_TYPE, F, MaxDestLoc,
Twine("most popular destination for conditional branches at ") +
((BranchLoc) ? Twine(BranchLoc->getFilename() + ":" +
Twine(BranchLoc.getLine()) + ":" +
Twine(BranchLoc.getCol()))
: Twine("<UNKNOWN LOCATION>")));
}
}

View File

@ -1,4 +1,4 @@
main:15680:0
main:15680:2500
1: 2500
4: 1000
5: 1000

View File

@ -3,6 +3,7 @@ _Z3fooi:7711:610
_Z3bari:20301:1437
1: 1437
main:184019:0
3: 0
4: 534
6: 2080
9: 2064 _Z3bari:1471 _Z3fooi:631

View File

@ -49,8 +49,8 @@ entry:
%0 = load i32, i32* %argc.addr, align 4, !dbg !21
%cmp = icmp slt i32 %0, 2, !dbg !23
br i1 %cmp, label %if.then, label %if.end, !dbg !24
; CHECK: edge entry -> if.then probability is 0x4ccccccd / 0x80000000 = 60.00%
; CHECK: edge entry -> if.end probability is 0x33333333 / 0x80000000 = 40.00%
; CHECK: edge entry -> if.then probability is 0x4ccf6b16 / 0x80000000 = 60.01%
; CHECK: edge entry -> if.end probability is 0x333094ea / 0x80000000 = 39.99%
if.then: ; preds = %entry
store i32 1, i32* %retval, align 4, !dbg !25
@ -67,8 +67,8 @@ if.end: ; preds = %entry
%3 = load i32, i32* %limit, align 4, !dbg !32
%cmp1 = icmp sgt i32 %3, 100, !dbg !34
br i1 %cmp1, label %if.then.2, label %if.else, !dbg !35
; CHECK: edge if.end -> if.then.2 probability is 0x66666666 / 0x80000000 = 80.00%
; CHECK: edge if.end -> if.else probability is 0x1999999a / 0x80000000 = 20.00%
; CHECK: edge if.end -> if.then.2 probability is 0x6652c748 / 0x80000000 = 79.94%
; CHECK: edge if.end -> if.else probability is 0x19ad38b8 / 0x80000000 = 20.06%
if.then.2: ; preds = %if.end
call void @llvm.dbg.declare(metadata double* %s, metadata !36, metadata !17), !dbg !38

View File

@ -48,8 +48,8 @@ while.cond: ; preds = %if.end, %entry
store i32 %inc, i32* %i, align 4, !dbg !14
%cmp = icmp slt i32 %0, 400000000, !dbg !14
br i1 %cmp, label %while.body, label %while.end, !dbg !14
; CHECK: edge while.cond -> while.body probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
; CHECK: edge while.cond -> while.end probability is 0x00000000 / 0x80000000 = 0.00%
; CHECK: edge while.cond -> while.body probability is 0x7ffa4e20 / 0x80000000 = 99.98% [HOT edge]
; CHECK: edge while.cond -> while.end probability is 0x0005b1e0 / 0x80000000 = 0.02%
while.body: ; preds = %while.cond
%1 = load i32, i32* %i, align 4, !dbg !16
@ -59,8 +59,8 @@ while.body: ; preds = %while.cond
; both branches out of while.body had the same weight. In reality,
; the edge while.body->if.then is taken most of the time.
;
; CHECK: edge while.body -> if.else probability is 0x00000000 / 0x80000000 = 0.00%
; CHECK: edge while.body -> if.then probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
; CHECK: edge while.body -> if.else probability is 0x0005b1e0 / 0x80000000 = 0.02%
; CHECK: edge while.body -> if.then probability is 0x7ffa4e20 / 0x80000000 = 99.98% [HOT edge]
if.then: ; preds = %while.body

View File

@ -35,15 +35,15 @@ while.cond: ; preds = %if.end, %entry
%0 = load i32, i32* %i.addr, align 4, !dbg !12
%cmp = icmp slt i32 %0, 100, !dbg !12
br i1 %cmp, label %while.body, label %while.end, !dbg !12
; CHECK: edge while.cond -> while.body probability is 0x7ebb907a / 0x80000000 = 99.01% [HOT edge]
; CHECK: edge while.cond -> while.end probability is 0x01446f86 / 0x80000000 = 0.99%
; CHECK: edge while.cond -> while.body probability is 0x7d83ba68 / 0x80000000 = 98.06% [HOT edge]
; CHECK: edge while.cond -> while.end probability is 0x027c4598 / 0x80000000 = 1.94%
while.body: ; preds = %while.cond
%1 = load i32, i32* %i.addr, align 4, !dbg !14
%cmp1 = icmp slt i32 %1, 50, !dbg !14
br i1 %cmp1, label %if.then, label %if.end, !dbg !14
; CHECK: edge while.body -> if.then probability is 0x06666666 / 0x80000000 = 5.00%
; CHECK: edge while.body -> if.end probability is 0x7999999a / 0x80000000 = 95.00% [HOT edge]
; CHECK: edge while.body -> if.then probability is 0x07878788 / 0x80000000 = 5.88%
; CHECK: edge while.body -> if.end probability is 0x78787878 / 0x80000000 = 94.12% [HOT edge]
if.then: ; preds = %while.body
%2 = load i32, i32* %x, align 4, !dbg !17

View File

@ -2,7 +2,7 @@
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/entry_counts.prof -S | FileCheck %s
; According to the profile, function empty() was called 13,293 times.
; CHECK: {{.*}} = !{!"function_entry_count", i64 13293}
; CHECK: {{.*}} = !{!"function_entry_count", i64 13294}
define void @empty() !dbg !4 {
entry:

View File

@ -8,12 +8,12 @@
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/fnptr.prof | opt -analyze -branch-prob | FileCheck %s
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/fnptr.binprof | opt -analyze -branch-prob | FileCheck %s
; CHECK: edge for.body3 -> if.then probability is 0x1a4f3959 / 0x80000000 = 20.55%
; CHECK: edge for.body3 -> if.else probability is 0x65b0c6a7 / 0x80000000 = 79.45%
; CHECK: edge for.inc -> for.inc12 probability is 0x20dc8dc9 / 0x80000000 = 25.67%
; CHECK: edge for.inc -> for.body3 probability is 0x5f237237 / 0x80000000 = 74.33%
; CHECK: edge for.inc12 -> for.end14 probability is 0x00000000 / 0x80000000 = 0.00%
; CHECK: edge for.inc12 -> for.cond1.preheader probability is 0x80000000 / 0x80000000 = 100.00%
; CHECK: edge for.body3 -> if.then probability is 0x1a56a56a / 0x80000000 = 20.58%
; CHECK: edge for.body3 -> if.else probability is 0x65a95a96 / 0x80000000 = 79.42%
; CHECK: edge for.inc -> for.inc12 probability is 0x000fdc50 / 0x80000000 = 0.05%
; CHECK: edge for.inc -> for.body3 probability is 0x7ff023b0 / 0x80000000 = 99.95%
; CHECK: edge for.inc12 -> for.end14 probability is 0x40000000 / 0x80000000 = 50.00%
; CHECK: edge for.inc12 -> for.cond1.preheader probability is 0x40000000 / 0x80000000 = 50.00%
; Original C++ test case.
;

View File

@ -29,8 +29,8 @@ entry:
%0 = load i32, i32* %a.addr, align 4, !dbg !14
%cmp = icmp sgt i32 %0, 0, !dbg !18
br i1 %cmp, label %if.then, label %if.else, !dbg !19
; CHECK: edge entry -> if.then probability is 0x0147ae14 / 0x80000000 = 1.00%
; CHECK: edge entry -> if.else probability is 0x7eb851ec / 0x80000000 = 99.00% [HOT edge]
; CHECK: edge entry -> if.then probability is 0x0167ba82 / 0x80000000 = 1.10%
; CHECK: edge entry -> if.else probability is 0x7e98457e / 0x80000000 = 98.90% [HOT edge]
if.then: ; preds = %entry
store i32 10, i32* %retval, align 4, !dbg !20

View File

@ -85,8 +85,8 @@ for.body: ; preds = %for.cond
%div = sdiv i64 %7, 3, !dbg !43
%cmp2 = icmp sgt i64 %6, %div, !dbg !44
br i1 %cmp2, label %if.then3, label %if.end, !dbg !45
; CHECK: edge for.body -> if.then3 probability is 0x51451451 / 0x80000000 = 63.49%
; CHECK: edge for.body -> if.end probability is 0x2ebaebaf / 0x80000000 = 36.51%
; CHECK: edge for.body -> if.then3 probability is 0x51292fa6 / 0x80000000 = 63.41%
; CHECK: edge for.body -> if.end probability is 0x2ed6d05a / 0x80000000 = 36.59%
if.then3: ; preds = %for.body
%8 = load i32, i32* %x.addr, align 4, !dbg !46
@ -100,8 +100,8 @@ if.end: ; preds = %if.then3, %for.body
%div4 = sdiv i64 %10, 4, !dbg !51
%cmp5 = icmp sgt i64 %9, %div4, !dbg !52
br i1 %cmp5, label %if.then6, label %if.else7, !dbg !53
; CHECK: edge if.end -> if.then6 probability is 0x5dbaa1dc / 0x80000000 = 73.23%
; CHECK: edge if.end -> if.else7 probability is 0x22455e24 / 0x80000000 = 26.77%
; CHECK: edge if.end -> if.then6 probability is 0x5d89d89e / 0x80000000 = 73.08%
; CHECK: edge if.end -> if.else7 probability is 0x22762762 / 0x80000000 = 26.92%
if.then6: ; preds = %if.end
%11 = load i32, i32* %y.addr, align 4, !dbg !54
@ -121,8 +121,8 @@ for.cond8: ; preds = %for.inc, %if.else7
%13 = load i64, i64* %j, align 8, !dbg !64
%cmp9 = icmp slt i64 %13, 100, !dbg !67
br i1 %cmp9, label %for.body10, label %for.end, !dbg !68
; CHECK: edge for.cond8 -> for.body10 probability is 0x7e985735 / 0x80000000 = 98.90% [HOT edge]
; CHECK: edge for.cond8 -> for.end probability is 0x0167a8cb / 0x80000000 = 1.10%
; CHECK: edge for.cond8 -> for.body10 probability is 0x7e941a89 / 0x80000000 = 98.89% [HOT edge]
; CHECK: edge for.cond8 -> for.end probability is 0x016be577 / 0x80000000 = 1.11%
for.body10: ; preds = %for.cond8