[PGO] Handle cases of non-instrument BBs

As shown in PR41279, some basic blocks (such as catchswitch) cannot be
instrumented. This patch filters out these BBs in PGO instrumentation.
It also sets the profile count to the fail-to-instrument edge, so that we
can propagate the counts in the CFG.

Differential Revision: https://reviews.llvm.org/D62700

llvm-svn: 362995
This commit is contained in:
Rong Xu 2019-06-10 22:36:27 +00:00
parent c867b92217
commit e44fa83c37
5 changed files with 192 additions and 44 deletions

View File

@ -544,6 +544,12 @@ struct BBInfo {
const std::string infoString() const {
return (Twine("Index=") + Twine(Index)).str();
}
// Empty function -- only applicable to UseBBInfo.
void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
// Empty function -- only applicable to UseBBInfo.
void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
};
// This class implements the CFG edges. Note the CFG can be a multi-graph.
@ -748,7 +754,7 @@ void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
}
// Collect all the BBs that will be instruments and return them in
// InstrumentBBs.
// InstrumentBBs and setup InEdges/OutEdge for UseBBInfo.
template <class Edge, class BBInfo>
void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
std::vector<BasicBlock *> &InstrumentBBs) {
@ -763,6 +769,18 @@ void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
if (InstrBB)
InstrumentBBs.push_back(InstrBB);
}
// Set up InEdges/OutEdges for all BBs.
for (auto &E : MST.AllEdges) {
if (E->Removed)
continue;
const BasicBlock *SrcBB = E->SrcBB;
const BasicBlock *DestBB = E->DestBB;
BBInfo &SrcInfo = getBBInfo(SrcBB);
BBInfo &DestInfo = getBBInfo(DestBB);
SrcInfo.addOutEdge(E.get());
DestInfo.addInEdge(E.get());
}
}
// Given a CFG E to be instrumented, find which BB to place the instrumented
@ -780,19 +798,25 @@ BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
if (DestBB == nullptr)
return SrcBB;
auto canInstrument = [this](BasicBlock *BB) -> BasicBlock * {
// There are basic blocks (such as catchswitch) cannot be instrumented.
// If the returned first insertion point is the end of BB, skip this BB.
if (BB->getFirstInsertionPt() == BB->end()) {
LLVM_DEBUG(dbgs() << "Cannot instrument BB index=" << getBBInfo(BB).Index
<< "\n");
return nullptr;
}
return BB;
};
// Instrument the SrcBB if it has a single successor,
// otherwise, the DestBB if this is not a critical edge.
Instruction *TI = SrcBB->getTerminator();
if (TI->getNumSuccessors() <= 1)
return SrcBB;
return canInstrument(SrcBB);
if (!E->IsCritical)
return DestBB;
return canInstrument(DestBB);
// For a critical edge, we have to split. Instrument the newly
// created BB.
IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
<< " --> " << getBBInfo(DestBB).Index << "\n");
unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum);
if (!InstrBB) {
@ -800,6 +824,11 @@ BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
dbgs() << "Fail to split critical edge: not instrument this edge.\n");
return nullptr;
}
// For a critical edge, we have to split. Instrument the newly
// created BB.
IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
<< " --> " << getBBInfo(DestBB).Index << "\n");
// Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
MST.addEdge(SrcBB, InstrBB, 0);
// Second one: Add new edge of InstrBB->DestBB.
@ -807,7 +836,7 @@ BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
NewEdge1.InMST = true;
E->Removed = true;
return InstrBB;
return canInstrument(InstrBB);
}
// Visit all edge and instrument the edges not in MST, and do value profiling.
@ -925,6 +954,18 @@ struct UseBBInfo : public BBInfo {
return BBInfo::infoString();
return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str();
}
// Add an OutEdge and update the edge count.
void addOutEdge(PGOUseEdge *E) {
OutEdges.push_back(E);
UnknownCountOutEdge++;
}
// Add an InEdge and update the edge count.
void addInEdge(PGOUseEdge *E) {
InEdges.push_back(E);
UnknownCountInEdge++;
}
};
} // end anonymous namespace
@ -1069,24 +1110,50 @@ bool PGOUseFunc::setInstrumentedCounts(
if (NumCounters != CountFromProfile.size()) {
return false;
}
// Set the profile count to the Instrumented BBs.
uint32_t I = 0;
for (BasicBlock *InstrBB : InstrumentBBs) {
uint64_t CountValue = CountFromProfile[I++];
UseBBInfo &Info = getBBInfo(InstrBB);
Info.setBBInfoCount(CountValue);
// If only one in-edge, the edge profile count should be the same as BB
// profile count.
if (Info.InEdges.size() == 1) {
Info.InEdges[0]->setEdgeCount(CountValue);
}
// If only one out-edge, the edge profile count should be the same as BB
// profile count.
if (Info.OutEdges.size() == 1) {
Info.OutEdges[0]->setEdgeCount(CountValue);
}
}
ProfileCountSize = CountFromProfile.size();
CountPosition = I;
// Set the edge count and update the count of unknown edges for BBs.
auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
E->setEdgeCount(Value);
this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
this->getBBInfo(E->DestBB).UnknownCountInEdge--;
};
// Set the profile count the Instrumented edges. There are BBs that not in
// MST but not instrumented. Need to set the edge count value so that we can
// populate the profile counts later.
for (auto &E : FuncInfo.MST.AllEdges) {
if (E->Removed || E->InMST)
continue;
const BasicBlock *SrcBB = E->SrcBB;
UseBBInfo &SrcInfo = getBBInfo(SrcBB);
// If only one out-edge, the edge profile count should be the same as BB
// profile count.
if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1)
setEdgeCount(E.get(), SrcInfo.CountValue);
else {
const BasicBlock *DestBB = E->DestBB;
UseBBInfo &DestInfo = getBBInfo(DestBB);
// If only one in-edge, the edge profile count should be the same as BB
// profile count.
if (DestInfo.CountValid && DestInfo.InEdges.size() == 1)
setEdgeCount(E.get(), DestInfo.CountValue);
}
if (E->CountValid)
continue;
// E's count should have been set from profile. If not, this meenas E skips
// the instrumentation. We set the count to 0.
setEdgeCount(E.get(), 0);
}
return true;
}
@ -1180,26 +1247,6 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros)
// Populate the counters from instrumented BBs to all BBs.
// In the end of this operation, all BBs should have a valid count value.
void PGOUseFunc::populateCounters() {
// First set up Count variable for all BBs.
for (auto &E : FuncInfo.MST.AllEdges) {
if (E->Removed)
continue;
const BasicBlock *SrcBB = E->SrcBB;
const BasicBlock *DestBB = E->DestBB;
UseBBInfo &SrcInfo = getBBInfo(SrcBB);
UseBBInfo &DestInfo = getBBInfo(DestBB);
SrcInfo.OutEdges.push_back(E.get());
DestInfo.InEdges.push_back(E.get());
SrcInfo.UnknownCountOutEdge++;
DestInfo.UnknownCountInEdge++;
if (!E->CountValid)
continue;
DestInfo.UnknownCountInEdge--;
SrcInfo.UnknownCountOutEdge--;
}
bool Changes = true;
unsigned NumPasses = 0;
while (Changes) {

View File

@ -0,0 +1,9 @@
:ir
foo
60927483247
4
3
2
3
2

View File

@ -0,0 +1,7 @@
:ir
f
62077759478
2
3
2

View File

@ -1,6 +1,9 @@
; Test that instrumentaiton works fine for the case of failing the split critical edges.
; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
; RUN: llvm-profdata merge %S/Inputs/PR41279.proftext -o %t.profdata
; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
declare void @f3({ i8*, i64 }*, { i8*, i64 }*, i64)
declare { i8*, i64 } @f0({ i8*, i64 }*)
@ -9,17 +12,22 @@ declare void @invok2({ i8*, i64 }*, i8* noalias readonly align 1, i64)
declare void @invok1({ i8*, i64 }*, { i8*, i64 }*, i64)
declare i32 @__CxxFrameHandler3(...)
define internal void @foo({ i8*, i64 }*, { i8*, i64 }*) personality i32 (...)* @__CxxFrameHandler3 {
define void @foo({ i8*, i64 }*, { i8*, i64 }*) personality i32 (...)* @__CxxFrameHandler3 {
; USE-LABEL: @foo
; USE-SAME: !prof ![[FUNC_ENTRY_COUNT:[0-9]+]]
%3 = alloca i8, align 1
store i8 0, i8* %3, align 1
%4 = call i64 @f1()
%5 = icmp ult i64 %4, 32
br i1 %5, label %7, label %13
; USE: br i1 %5, label %7, label %13
; USE-SAME: !prof ![[BW_ENTRY1:[0-9]+]]
6:
cleanupret from %17 unwind to caller
; GEN: 6:
; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 2)
; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 60927483247, i32 4, i32 2)
7:
store i8 1, i8* %3, align 1
@ -42,13 +50,13 @@ define internal void @foo({ i8*, i64 }*, { i8*, i64 }*) personality i32 (...)* @
store i8 0, i8* %3, align 1
br label %14
; GEN: 12:
; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 1)
; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 60927483247, i32 4, i32 1)
13:
call void @f3({ i8*, i64 }* %0, { i8*, i64 }* %1, i64 1)
br label %14
; GEN: 13:
; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 0)
; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 60927483247, i32 4, i32 0)
14:
ret void
@ -57,11 +65,20 @@ define internal void @foo({ i8*, i64 }*, { i8*, i64 }*) personality i32 (...)* @
store i8 0, i8* %3, align 1
br label %6
; GEN: 15:
; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 3)
; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 60927483247, i32 4, i32 3)
16:
%17 = cleanuppad within none []
%18 = load i8, i8* %3, align 1
%19 = trunc i8 %18 to i1
br i1 %19, label %15, label %6
; USE: br i1 %19, label %15, label %6
; USE-SAME: !prof ![[BW_ENTRY2:[0-9]+]]
}
; USE-DAG: {{![0-9]+}} = !{i32 1, !"ProfileSummary", {{![0-9]+}}}
; USE-DAG: {{![0-9]+}} = !{!"DetailedSummary", {{![0-9]+}}}
; USE-DAG: ![[FUNC_ENTRY_COUNT]] = !{!"function_entry_count", i64 8}
; USE_DAG: ![[BW_ENTRY1]] = !{!"branch_weights", i32 5, i32 3}
; USE_DAG: ![[BW_ENTRY2]] = !{!"branch_weights", i32 2, i32 1}

View File

@ -0,0 +1,68 @@
; Test that instrumentaiton works fine for the case of catchswitch stmts.
; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
; RUN: llvm-profdata merge %S/Inputs/PR41279_2.proftext -o %t.profdata
; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
define dso_local void @f() personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) {
; USE-LABEL: @f
; USE-SAME: !prof ![[FUNC_ENTRY_COUNT:[0-9]+]]
; USE-DAG: {{![0-9]+}} = !{i32 1, !"ProfileSummary", {{![0-9]+}}}
; USE-DAG: {{![0-9]+}} = !{!"DetailedSummary", {{![0-9]+}}}
; USE-DAG: ![[FUNC_ENTRY_COUNT]] = !{!"function_entry_count", i64 5}
entry:
%__exception_code = alloca i32, align 4
%__exception_code2 = alloca i32, align 4
invoke void @f() #2
to label %invoke.cont unwind label %catch.dispatch
catch.dispatch:
%0 = catchswitch within none [label %__except] unwind to caller
__except:
%1 = catchpad within %0 [i8* null]
catchret from %1 to label %__except1
__except1:
%2 = call i32 @llvm.eh.exceptioncode(token %1)
store i32 %2, i32* %__exception_code, align 4
br label %__try.cont7
;GEN: _except1:
;GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @__profn_f, i32 0, i32 0), i64 62077759478, i32 2, i32 1)
invoke.cont:
br label %__try.cont
__try.cont:
invoke void @f()
to label %invoke.cont3 unwind label %catch.dispatch4
catch.dispatch4:
%3 = catchswitch within none [label %__except5] unwind to caller
; GEN: catch.dispatch4:
; GEN-NOT: call void @llvm.instrprof.increment
__except5:
%4 = catchpad within %3 [i8* null]
catchret from %4 to label %__except6
__except6:
%5 = call i32 @llvm.eh.exceptioncode(token %4)
store i32 %5, i32* %__exception_code2, align 4
br label %__try.cont7
__try.cont7:
ret void
invoke.cont3:
br label %__try.cont7
;GEN: invoke.cont3:
;GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @__profn_f, i32 0, i32 0), i64 62077759478, i32 2, i32 0)
}
declare dso_local i32 @__C_specific_handler(...)
declare i32 @llvm.eh.exceptioncode(token)