SamplePGO: convert callsite samples map key from callsite_location to callsite_location+callee_name

Summary: For iterative SamplePGO, an indirect call can be speculatively promoted to multiple direct calls and get inlined. All these promoted direct calls will share the same callsite location (offset+discriminator). With the current implementation, we cannot distinguish between different promotion candidates and its inlined instance. This patch adds callee_name to the key of the callsite sample map. And added helper functions to get all inlined callee samples for a given callsite location. This helps the profile annotator promote correct targets and inline it before annotation, and ensures all indirect call targets to be annotated correctly.

Reviewers: davidxl, dnovillo

Reviewed By: davidxl

Subscribers: andreadb, llvm-commits

Differential Revision: https://reviews.llvm.org/D31950

llvm-svn: 300240
This commit is contained in:
Dehao Chen 2017-04-13 19:52:10 +00:00
parent a80f2041f7
commit 2c7ca9b5df
7 changed files with 176 additions and 86 deletions

View File

@ -184,7 +184,8 @@ raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample);
typedef std::map<LineLocation, SampleRecord> BodySampleMap;
class FunctionSamples;
typedef std::map<LineLocation, FunctionSamples> CallsiteSampleMap;
typedef StringMap<FunctionSamples> FunctionSamplesMap;
typedef std::map<LineLocation, FunctionSamplesMap> CallsiteSampleMap;
/// Representation of the samples collected for a function.
///
@ -252,18 +253,41 @@ public:
}
/// Return the function samples at the given callsite location.
FunctionSamples &functionSamplesAt(const LineLocation &Loc) {
FunctionSamplesMap &functionSamplesAt(const LineLocation &Loc) {
return CallsiteSamples[Loc];
}
/// Return a pointer to function samples at the given callsite location.
const FunctionSamples *findFunctionSamplesAt(const LineLocation &Loc) const {
/// Returns the FunctionSamplesMap at the given \p Loc.
const FunctionSamplesMap *
findFunctionSamplesMapAt(const LineLocation &Loc) const {
auto iter = CallsiteSamples.find(Loc);
if (iter == CallsiteSamples.end()) {
if (iter == CallsiteSamples.end())
return nullptr;
} else {
return &iter->second;
}
return &iter->second;
}
/// Returns a pointer to FunctionSamples at the given callsite location \p Loc
/// with callee \p CalleeName. If no callsite can be found, relax the
/// restriction to return the FunctionSamples at callsite location \p Loc
/// with the maximum total sample count.
const FunctionSamples *findFunctionSamplesAt(const LineLocation &Loc,
StringRef CalleeName) const {
auto iter = CallsiteSamples.find(Loc);
if (iter == CallsiteSamples.end())
return nullptr;
auto FS = iter->second.find(CalleeName);
if (FS != iter->second.end())
return &FS->getValue();
// If we cannot find exact match of the callee name, return the FS with
// the max total count.
uint64_t MaxTotalSamples = 0;
const FunctionSamples *R = nullptr;
for (const auto &NameFS : iter->second)
if (NameFS.second.getTotalSamples() >= MaxTotalSamples) {
MaxTotalSamples = NameFS.second.getTotalSamples();
R = &NameFS.second;
}
return R;
}
bool empty() const { return TotalSamples == 0; }
@ -297,8 +321,9 @@ public:
}
for (const auto &I : Other.getCallsiteSamples()) {
const LineLocation &Loc = I.first;
const FunctionSamples &Rec = I.second;
MergeResult(Result, functionSamplesAt(Loc).merge(Rec, Weight));
FunctionSamplesMap &FSMap = functionSamplesAt(Loc);
for (const auto &Rec : I.second)
MergeResult(Result, FSMap[Rec.first()].merge(Rec.second, Weight));
}
return Result;
}
@ -314,7 +339,8 @@ public:
if (!F || !F->getSubprogram())
S.insert(Function::getGUID(Name));
for (auto CS : CallsiteSamples)
CS.second.findImportedFunctions(S, M, Threshold);
for (const auto &NameFS : CS.second)
NameFS.second.findImportedFunctions(S, M, Threshold);
}
/// Set the name of the function.

View File

@ -129,12 +129,14 @@ void FunctionSamples::print(raw_ostream &OS, unsigned Indent) const {
OS.indent(Indent);
if (!CallsiteSamples.empty()) {
OS << "Samples collected in inlined callsites {\n";
SampleSorter<LineLocation, FunctionSamples> SortedCallsiteSamples(
SampleSorter<LineLocation, FunctionSamplesMap> SortedCallsiteSamples(
CallsiteSamples);
for (const auto &CS : SortedCallsiteSamples.get()) {
OS.indent(Indent + 2);
OS << CS->first << ": inlined callee: " << CS->second.getName() << ": ";
CS->second.print(OS, Indent + 4);
for (const auto &FS : CS->second) {
OS.indent(Indent + 2);
OS << CS->first << ": inlined callee: " << FS.second.getName() << ": ";
FS.second.print(OS, Indent + 4);
}
}
OS << "}\n";
} else {

View File

@ -211,7 +211,7 @@ std::error_code SampleProfileReaderText::read() {
InlineStack.pop_back();
}
FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
LineLocation(LineOffset, Discriminator));
LineLocation(LineOffset, Discriminator))[FName];
FSamples.setName(FName);
MergeResult(Result, FSamples.addTotalSamples(NumSamples));
InlineStack.push_back(&FSamples);
@ -363,8 +363,8 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
if (std::error_code EC = FName.getError())
return EC;
FunctionSamples &CalleeProfile =
FProfile.functionSamplesAt(LineLocation(*LineOffset, *Discriminator));
FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
LineLocation(*LineOffset, *Discriminator))[*FName];
CalleeProfile.setName(*FName);
if (std::error_code EC = readProfile(CalleeProfile))
return EC;
@ -636,7 +636,7 @@ std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
uint32_t LineOffset = Offset >> 16;
uint32_t Discriminator = Offset & 0xffff;
FProfile = &CallerProfile->functionSamplesAt(
LineLocation(LineOffset, Discriminator));
LineLocation(LineOffset, Discriminator))[Name];
}
FProfile->setName(Name);

View File

@ -68,20 +68,21 @@ std::error_code SampleProfileWriterText::write(const FunctionSamples &S) {
OS << "\n";
}
SampleSorter<LineLocation, FunctionSamples> SortedCallsiteSamples(
SampleSorter<LineLocation, FunctionSamplesMap> SortedCallsiteSamples(
S.getCallsiteSamples());
Indent += 1;
for (const auto &I : SortedCallsiteSamples.get()) {
LineLocation Loc = I->first;
const FunctionSamples &CalleeSamples = I->second;
OS.indent(Indent);
if (Loc.Discriminator == 0)
OS << Loc.LineOffset << ": ";
else
OS << Loc.LineOffset << "." << Loc.Discriminator << ": ";
if (std::error_code EC = write(CalleeSamples))
return EC;
}
for (const auto &I : SortedCallsiteSamples.get())
for (const auto &FS : I->second) {
LineLocation Loc = I->first;
const FunctionSamples &CalleeSamples = FS.second;
OS.indent(Indent);
if (Loc.Discriminator == 0)
OS << Loc.LineOffset << ": ";
else
OS << Loc.LineOffset << "." << Loc.Discriminator << ": ";
if (std::error_code EC = write(CalleeSamples))
return EC;
}
Indent -= 1;
return sampleprof_error::success;
@ -109,11 +110,12 @@ void SampleProfileWriterBinary::addNames(const FunctionSamples &S) {
}
// Recursively add all the names for inlined callsites.
for (const auto &J : S.getCallsiteSamples()) {
const FunctionSamples &CalleeSamples = J.second;
addName(CalleeSamples.getName());
addNames(CalleeSamples);
}
for (const auto &J : S.getCallsiteSamples())
for (const auto &FS : J.second) {
const FunctionSamples &CalleeSamples = FS.second;
addName(CalleeSamples.getName());
addNames(CalleeSamples);
}
}
std::error_code SampleProfileWriterBinary::writeHeader(
@ -187,14 +189,15 @@ std::error_code SampleProfileWriterBinary::writeBody(const FunctionSamples &S) {
// Recursively emit all the callsite samples.
encodeULEB128(S.getCallsiteSamples().size(), OS);
for (const auto &J : S.getCallsiteSamples()) {
LineLocation Loc = J.first;
const FunctionSamples &CalleeSamples = J.second;
encodeULEB128(Loc.LineOffset, OS);
encodeULEB128(Loc.Discriminator, OS);
if (std::error_code EC = writeBody(CalleeSamples))
return EC;
}
for (const auto &J : S.getCallsiteSamples())
for (const auto &FS : J.second) {
LineLocation Loc = J.first;
const FunctionSamples &CalleeSamples = FS.second;
encodeULEB128(Loc.LineOffset, OS);
encodeULEB128(Loc.Discriminator, OS);
if (std::error_code EC = writeBody(CalleeSamples))
return EC;
}
return sampleprof_error::success;
}

View File

@ -162,6 +162,8 @@ protected:
ErrorOr<uint64_t> getInstWeight(const Instruction &I);
ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB);
const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const;
std::vector<const FunctionSamples *>
findIndirectCallFunctionSamples(const Instruction &I) const;
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
bool inlineHotFunctions(Function &F,
DenseSet<GlobalValue::GUID> &ImportGUIDs);
@ -330,11 +332,12 @@ SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS) const {
// If there are inlined callsites in this function, count the samples found
// in the respective bodies. However, do not bother counting callees with 0
// total samples, these are callees that were never invoked at runtime.
for (const auto &I : FS->getCallsiteSamples()) {
const FunctionSamples *CalleeSamples = &I.second;
if (callsiteIsHot(FS, CalleeSamples))
Count += countUsedRecords(CalleeSamples);
}
for (const auto &I : FS->getCallsiteSamples())
for (const auto &J : I.second) {
const FunctionSamples *CalleeSamples = &J.second;
if (callsiteIsHot(FS, CalleeSamples))
Count += countUsedRecords(CalleeSamples);
}
return Count;
}
@ -347,11 +350,12 @@ SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS) const {
unsigned Count = FS->getBodySamples().size();
// Only count records in hot callsites.
for (const auto &I : FS->getCallsiteSamples()) {
const FunctionSamples *CalleeSamples = &I.second;
if (callsiteIsHot(FS, CalleeSamples))
Count += countBodyRecords(CalleeSamples);
}
for (const auto &I : FS->getCallsiteSamples())
for (const auto &J : I.second) {
const FunctionSamples *CalleeSamples = &J.second;
if (callsiteIsHot(FS, CalleeSamples))
Count += countBodyRecords(CalleeSamples);
}
return Count;
}
@ -366,11 +370,12 @@ SampleCoverageTracker::countBodySamples(const FunctionSamples *FS) const {
Total += I.second.getSamples();
// Only count samples in hot callsites.
for (const auto &I : FS->getCallsiteSamples()) {
const FunctionSamples *CalleeSamples = &I.second;
if (callsiteIsHot(FS, CalleeSamples))
Total += countBodySamples(CalleeSamples);
}
for (const auto &I : FS->getCallsiteSamples())
for (const auto &J : I.second) {
const FunctionSamples *CalleeSamples = &J.second;
if (callsiteIsHot(FS, CalleeSamples))
Total += countBodySamples(CalleeSamples);
}
return Total;
}
@ -559,12 +564,49 @@ SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {
if (!DIL) {
return nullptr;
}
StringRef CalleeName;
if (const CallInst *CI = dyn_cast<CallInst>(&Inst))
if (Function *Callee = CI->getCalledFunction())
CalleeName = Callee->getName();
const FunctionSamples *FS = findFunctionSamples(Inst);
if (FS == nullptr)
return nullptr;
return FS->findFunctionSamplesAt(
LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()));
LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()), CalleeName);
}
/// Returns a vector of FunctionSamples that are the indirect call targets
/// of \p Inst. The vector is sorted by the total number of samples.
std::vector<const FunctionSamples *>
SampleProfileLoader::findIndirectCallFunctionSamples(
const Instruction &Inst) const {
const DILocation *DIL = Inst.getDebugLoc();
std::vector<const FunctionSamples *> R;
if (!DIL) {
return R;
}
const FunctionSamples *FS = findFunctionSamples(Inst);
if (FS == nullptr)
return R;
if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(
LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()))) {
if (M->size() == 0)
return R;
for (const auto &NameFS : *M) {
R.push_back(&NameFS.second);
}
std::sort(R.begin(), R.end(),
[](const FunctionSamples *L, const FunctionSamples *R) {
return L->getTotalSamples() > R->getTotalSamples();
});
}
return R;
}
/// \brief Get the FunctionSamples for an instruction.
@ -578,18 +620,23 @@ SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {
/// \returns the FunctionSamples pointer to the inlined instance.
const FunctionSamples *
SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
SmallVector<LineLocation, 10> S;
SmallVector<std::pair<LineLocation, StringRef>, 10> S;
const DILocation *DIL = Inst.getDebugLoc();
if (!DIL) {
if (!DIL)
return Samples;
const DILocation *PrevDIL = DIL;
for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) {
S.push_back(std::make_pair(
LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()),
PrevDIL->getScope()->getSubprogram()->getLinkageName()));
PrevDIL = DIL;
}
for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt())
S.push_back(LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()));
if (S.size() == 0)
return Samples;
const FunctionSamples *FS = Samples;
for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) {
FS = FS->findFunctionSamplesAt(S[i]);
FS = FS->findFunctionSamplesAt(S[i].first, S[i].second);
}
return FS;
}
@ -638,25 +685,27 @@ bool SampleProfileLoader::inlineHotFunctions(
Function *CalledFunction = CallSite(I).getCalledFunction();
Instruction *DI = I;
if (!CalledFunction && !PromotedInsns.count(I) &&
CallSite(I).isIndirectCall()) {
auto CalleeFunctionName = findCalleeFunctionSamples(*I)->getName();
const char *Reason = "Callee function not available";
CalledFunction = F.getParent()->getFunction(CalleeFunctionName);
if (CalledFunction && isLegalToPromote(I, CalledFunction, &Reason)) {
// The indirect target was promoted and inlined in the profile, as a
// result, we do not have profile info for the branch probability.
// We set the probability to 80% taken to indicate that the static
// call is likely taken.
DI = dyn_cast<Instruction>(
promoteIndirectCall(I, CalledFunction, 80, 100, false)
->stripPointerCasts());
PromotedInsns.insert(I);
} else {
DEBUG(dbgs() << "\nFailed to promote indirect call to "
<< CalleeFunctionName << " because " << Reason << "\n");
continue;
CallSite(I).isIndirectCall())
for (const auto *FS : findIndirectCallFunctionSamples(*I)) {
auto CalleeFunctionName = FS->getName();
const char *Reason = "Callee function not available";
CalledFunction = F.getParent()->getFunction(CalleeFunctionName);
if (CalledFunction && isLegalToPromote(I, CalledFunction, &Reason)) {
// The indirect target was promoted and inlined in the profile, as a
// result, we do not have profile info for the branch probability.
// We set the probability to 80% taken to indicate that the static
// call is likely taken.
DI = dyn_cast<Instruction>(
promoteIndirectCall(I, CalledFunction, 80, 100, false)
->stripPointerCasts());
PromotedInsns.insert(I);
} else {
DEBUG(dbgs() << "\nFailed to promote indirect call to "
<< CalleeFunctionName << " because " << Reason
<< "\n");
continue;
}
}
}
if (!CalledFunction || !CalledFunction->getSubprogram()) {
findCalleeFunctionSamples(*I)->findImportedFunctions(
ImportGUIDs, F.getParent(),

View File

@ -1,8 +1,10 @@
test:63067:0
4: 3345 _Z3barv:1398 _Z3foov:2059
test_inline:3000:0
5: foo_inline:3000
5: foo_inline1:3000
1: 3000
5: foo_inline2:4000
1: 4000
test_noinline:3000:0
5: foo_noinline:3000
1: 3000

View File

@ -16,10 +16,14 @@ define void @test_inline(i64* (i32*)*, i32* %x) !dbg !3 {
%2 = alloca i64* (i32*)*
store i64* (i32*)* %0, i64* (i32*)** %2
%3 = load i64* (i32*)*, i64* (i32*)** %2
; CHECK: icmp {{.*}} @foo_inline
; CHECK: icmp {{.*}} @foo_inline2
; CHECK: if.true.direct_targ:
; CHECK-NOT: call
; CHECK: if.false.orig_indirect:
; CHECK: icmp {{.*}} @foo_inline1
; CHECK: if.true.direct_targ1:
; CHECK-NOT: call
; CHECK: if.false.orig_indirect2:
; CHECK: call
call i64* %3(i32* %x), !dbg !5
ret void
@ -39,7 +43,11 @@ define void @test_noinline(void ()*) !dbg !3 {
@x = global i32 0, align 4
define i32* @foo_inline(i32* %x) !dbg !3 {
define i32* @foo_inline1(i32* %x) !dbg !3 {
ret i32* %x
}
define i32* @foo_inline2(i32* %x) !dbg !3 {
ret i32* %x
}