forked from OSchip/llvm-project
SamplePGO: convert callsite samples map key from callsite_location to callsite_location+callee_name
Summary: For iterative SamplePGO, an indirect call can be speculatively promoted to multiple direct calls and get inlined. All these promoted direct calls will share the same callsite location (offset+discriminator). With the current implementation, we cannot distinguish between different promotion candidates and its inlined instance. This patch adds callee_name to the key of the callsite sample map. And added helper functions to get all inlined callee samples for a given callsite location. This helps the profile annotator promote correct targets and inline it before annotation, and ensures all indirect call targets to be annotated correctly. Reviewers: davidxl, dnovillo Reviewed By: davidxl Subscribers: andreadb, llvm-commits Differential Revision: https://reviews.llvm.org/D31950 llvm-svn: 300240
This commit is contained in:
parent
a80f2041f7
commit
2c7ca9b5df
|
@ -184,7 +184,8 @@ raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample);
|
|||
|
||||
typedef std::map<LineLocation, SampleRecord> BodySampleMap;
|
||||
class FunctionSamples;
|
||||
typedef std::map<LineLocation, FunctionSamples> CallsiteSampleMap;
|
||||
typedef StringMap<FunctionSamples> FunctionSamplesMap;
|
||||
typedef std::map<LineLocation, FunctionSamplesMap> CallsiteSampleMap;
|
||||
|
||||
/// Representation of the samples collected for a function.
|
||||
///
|
||||
|
@ -252,18 +253,41 @@ public:
|
|||
}
|
||||
|
||||
/// Return the function samples at the given callsite location.
|
||||
FunctionSamples &functionSamplesAt(const LineLocation &Loc) {
|
||||
FunctionSamplesMap &functionSamplesAt(const LineLocation &Loc) {
|
||||
return CallsiteSamples[Loc];
|
||||
}
|
||||
|
||||
/// Return a pointer to function samples at the given callsite location.
|
||||
const FunctionSamples *findFunctionSamplesAt(const LineLocation &Loc) const {
|
||||
/// Returns the FunctionSamplesMap at the given \p Loc.
|
||||
const FunctionSamplesMap *
|
||||
findFunctionSamplesMapAt(const LineLocation &Loc) const {
|
||||
auto iter = CallsiteSamples.find(Loc);
|
||||
if (iter == CallsiteSamples.end()) {
|
||||
if (iter == CallsiteSamples.end())
|
||||
return nullptr;
|
||||
} else {
|
||||
return &iter->second;
|
||||
}
|
||||
return &iter->second;
|
||||
}
|
||||
|
||||
/// Returns a pointer to FunctionSamples at the given callsite location \p Loc
|
||||
/// with callee \p CalleeName. If no callsite can be found, relax the
|
||||
/// restriction to return the FunctionSamples at callsite location \p Loc
|
||||
/// with the maximum total sample count.
|
||||
const FunctionSamples *findFunctionSamplesAt(const LineLocation &Loc,
|
||||
StringRef CalleeName) const {
|
||||
auto iter = CallsiteSamples.find(Loc);
|
||||
if (iter == CallsiteSamples.end())
|
||||
return nullptr;
|
||||
auto FS = iter->second.find(CalleeName);
|
||||
if (FS != iter->second.end())
|
||||
return &FS->getValue();
|
||||
// If we cannot find exact match of the callee name, return the FS with
|
||||
// the max total count.
|
||||
uint64_t MaxTotalSamples = 0;
|
||||
const FunctionSamples *R = nullptr;
|
||||
for (const auto &NameFS : iter->second)
|
||||
if (NameFS.second.getTotalSamples() >= MaxTotalSamples) {
|
||||
MaxTotalSamples = NameFS.second.getTotalSamples();
|
||||
R = &NameFS.second;
|
||||
}
|
||||
return R;
|
||||
}
|
||||
|
||||
bool empty() const { return TotalSamples == 0; }
|
||||
|
@ -297,8 +321,9 @@ public:
|
|||
}
|
||||
for (const auto &I : Other.getCallsiteSamples()) {
|
||||
const LineLocation &Loc = I.first;
|
||||
const FunctionSamples &Rec = I.second;
|
||||
MergeResult(Result, functionSamplesAt(Loc).merge(Rec, Weight));
|
||||
FunctionSamplesMap &FSMap = functionSamplesAt(Loc);
|
||||
for (const auto &Rec : I.second)
|
||||
MergeResult(Result, FSMap[Rec.first()].merge(Rec.second, Weight));
|
||||
}
|
||||
return Result;
|
||||
}
|
||||
|
@ -314,7 +339,8 @@ public:
|
|||
if (!F || !F->getSubprogram())
|
||||
S.insert(Function::getGUID(Name));
|
||||
for (auto CS : CallsiteSamples)
|
||||
CS.second.findImportedFunctions(S, M, Threshold);
|
||||
for (const auto &NameFS : CS.second)
|
||||
NameFS.second.findImportedFunctions(S, M, Threshold);
|
||||
}
|
||||
|
||||
/// Set the name of the function.
|
||||
|
|
|
@ -129,12 +129,14 @@ void FunctionSamples::print(raw_ostream &OS, unsigned Indent) const {
|
|||
OS.indent(Indent);
|
||||
if (!CallsiteSamples.empty()) {
|
||||
OS << "Samples collected in inlined callsites {\n";
|
||||
SampleSorter<LineLocation, FunctionSamples> SortedCallsiteSamples(
|
||||
SampleSorter<LineLocation, FunctionSamplesMap> SortedCallsiteSamples(
|
||||
CallsiteSamples);
|
||||
for (const auto &CS : SortedCallsiteSamples.get()) {
|
||||
OS.indent(Indent + 2);
|
||||
OS << CS->first << ": inlined callee: " << CS->second.getName() << ": ";
|
||||
CS->second.print(OS, Indent + 4);
|
||||
for (const auto &FS : CS->second) {
|
||||
OS.indent(Indent + 2);
|
||||
OS << CS->first << ": inlined callee: " << FS.second.getName() << ": ";
|
||||
FS.second.print(OS, Indent + 4);
|
||||
}
|
||||
}
|
||||
OS << "}\n";
|
||||
} else {
|
||||
|
|
|
@ -211,7 +211,7 @@ std::error_code SampleProfileReaderText::read() {
|
|||
InlineStack.pop_back();
|
||||
}
|
||||
FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
|
||||
LineLocation(LineOffset, Discriminator));
|
||||
LineLocation(LineOffset, Discriminator))[FName];
|
||||
FSamples.setName(FName);
|
||||
MergeResult(Result, FSamples.addTotalSamples(NumSamples));
|
||||
InlineStack.push_back(&FSamples);
|
||||
|
@ -363,8 +363,8 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
|
|||
if (std::error_code EC = FName.getError())
|
||||
return EC;
|
||||
|
||||
FunctionSamples &CalleeProfile =
|
||||
FProfile.functionSamplesAt(LineLocation(*LineOffset, *Discriminator));
|
||||
FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
|
||||
LineLocation(*LineOffset, *Discriminator))[*FName];
|
||||
CalleeProfile.setName(*FName);
|
||||
if (std::error_code EC = readProfile(CalleeProfile))
|
||||
return EC;
|
||||
|
@ -636,7 +636,7 @@ std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
|
|||
uint32_t LineOffset = Offset >> 16;
|
||||
uint32_t Discriminator = Offset & 0xffff;
|
||||
FProfile = &CallerProfile->functionSamplesAt(
|
||||
LineLocation(LineOffset, Discriminator));
|
||||
LineLocation(LineOffset, Discriminator))[Name];
|
||||
}
|
||||
FProfile->setName(Name);
|
||||
|
||||
|
|
|
@ -68,20 +68,21 @@ std::error_code SampleProfileWriterText::write(const FunctionSamples &S) {
|
|||
OS << "\n";
|
||||
}
|
||||
|
||||
SampleSorter<LineLocation, FunctionSamples> SortedCallsiteSamples(
|
||||
SampleSorter<LineLocation, FunctionSamplesMap> SortedCallsiteSamples(
|
||||
S.getCallsiteSamples());
|
||||
Indent += 1;
|
||||
for (const auto &I : SortedCallsiteSamples.get()) {
|
||||
LineLocation Loc = I->first;
|
||||
const FunctionSamples &CalleeSamples = I->second;
|
||||
OS.indent(Indent);
|
||||
if (Loc.Discriminator == 0)
|
||||
OS << Loc.LineOffset << ": ";
|
||||
else
|
||||
OS << Loc.LineOffset << "." << Loc.Discriminator << ": ";
|
||||
if (std::error_code EC = write(CalleeSamples))
|
||||
return EC;
|
||||
}
|
||||
for (const auto &I : SortedCallsiteSamples.get())
|
||||
for (const auto &FS : I->second) {
|
||||
LineLocation Loc = I->first;
|
||||
const FunctionSamples &CalleeSamples = FS.second;
|
||||
OS.indent(Indent);
|
||||
if (Loc.Discriminator == 0)
|
||||
OS << Loc.LineOffset << ": ";
|
||||
else
|
||||
OS << Loc.LineOffset << "." << Loc.Discriminator << ": ";
|
||||
if (std::error_code EC = write(CalleeSamples))
|
||||
return EC;
|
||||
}
|
||||
Indent -= 1;
|
||||
|
||||
return sampleprof_error::success;
|
||||
|
@ -109,11 +110,12 @@ void SampleProfileWriterBinary::addNames(const FunctionSamples &S) {
|
|||
}
|
||||
|
||||
// Recursively add all the names for inlined callsites.
|
||||
for (const auto &J : S.getCallsiteSamples()) {
|
||||
const FunctionSamples &CalleeSamples = J.second;
|
||||
addName(CalleeSamples.getName());
|
||||
addNames(CalleeSamples);
|
||||
}
|
||||
for (const auto &J : S.getCallsiteSamples())
|
||||
for (const auto &FS : J.second) {
|
||||
const FunctionSamples &CalleeSamples = FS.second;
|
||||
addName(CalleeSamples.getName());
|
||||
addNames(CalleeSamples);
|
||||
}
|
||||
}
|
||||
|
||||
std::error_code SampleProfileWriterBinary::writeHeader(
|
||||
|
@ -187,14 +189,15 @@ std::error_code SampleProfileWriterBinary::writeBody(const FunctionSamples &S) {
|
|||
|
||||
// Recursively emit all the callsite samples.
|
||||
encodeULEB128(S.getCallsiteSamples().size(), OS);
|
||||
for (const auto &J : S.getCallsiteSamples()) {
|
||||
LineLocation Loc = J.first;
|
||||
const FunctionSamples &CalleeSamples = J.second;
|
||||
encodeULEB128(Loc.LineOffset, OS);
|
||||
encodeULEB128(Loc.Discriminator, OS);
|
||||
if (std::error_code EC = writeBody(CalleeSamples))
|
||||
return EC;
|
||||
}
|
||||
for (const auto &J : S.getCallsiteSamples())
|
||||
for (const auto &FS : J.second) {
|
||||
LineLocation Loc = J.first;
|
||||
const FunctionSamples &CalleeSamples = FS.second;
|
||||
encodeULEB128(Loc.LineOffset, OS);
|
||||
encodeULEB128(Loc.Discriminator, OS);
|
||||
if (std::error_code EC = writeBody(CalleeSamples))
|
||||
return EC;
|
||||
}
|
||||
|
||||
return sampleprof_error::success;
|
||||
}
|
||||
|
|
|
@ -162,6 +162,8 @@ protected:
|
|||
ErrorOr<uint64_t> getInstWeight(const Instruction &I);
|
||||
ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB);
|
||||
const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const;
|
||||
std::vector<const FunctionSamples *>
|
||||
findIndirectCallFunctionSamples(const Instruction &I) const;
|
||||
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
|
||||
bool inlineHotFunctions(Function &F,
|
||||
DenseSet<GlobalValue::GUID> &ImportGUIDs);
|
||||
|
@ -330,11 +332,12 @@ SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS) const {
|
|||
// If there are inlined callsites in this function, count the samples found
|
||||
// in the respective bodies. However, do not bother counting callees with 0
|
||||
// total samples, these are callees that were never invoked at runtime.
|
||||
for (const auto &I : FS->getCallsiteSamples()) {
|
||||
const FunctionSamples *CalleeSamples = &I.second;
|
||||
if (callsiteIsHot(FS, CalleeSamples))
|
||||
Count += countUsedRecords(CalleeSamples);
|
||||
}
|
||||
for (const auto &I : FS->getCallsiteSamples())
|
||||
for (const auto &J : I.second) {
|
||||
const FunctionSamples *CalleeSamples = &J.second;
|
||||
if (callsiteIsHot(FS, CalleeSamples))
|
||||
Count += countUsedRecords(CalleeSamples);
|
||||
}
|
||||
|
||||
return Count;
|
||||
}
|
||||
|
@ -347,11 +350,12 @@ SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS) const {
|
|||
unsigned Count = FS->getBodySamples().size();
|
||||
|
||||
// Only count records in hot callsites.
|
||||
for (const auto &I : FS->getCallsiteSamples()) {
|
||||
const FunctionSamples *CalleeSamples = &I.second;
|
||||
if (callsiteIsHot(FS, CalleeSamples))
|
||||
Count += countBodyRecords(CalleeSamples);
|
||||
}
|
||||
for (const auto &I : FS->getCallsiteSamples())
|
||||
for (const auto &J : I.second) {
|
||||
const FunctionSamples *CalleeSamples = &J.second;
|
||||
if (callsiteIsHot(FS, CalleeSamples))
|
||||
Count += countBodyRecords(CalleeSamples);
|
||||
}
|
||||
|
||||
return Count;
|
||||
}
|
||||
|
@ -366,11 +370,12 @@ SampleCoverageTracker::countBodySamples(const FunctionSamples *FS) const {
|
|||
Total += I.second.getSamples();
|
||||
|
||||
// Only count samples in hot callsites.
|
||||
for (const auto &I : FS->getCallsiteSamples()) {
|
||||
const FunctionSamples *CalleeSamples = &I.second;
|
||||
if (callsiteIsHot(FS, CalleeSamples))
|
||||
Total += countBodySamples(CalleeSamples);
|
||||
}
|
||||
for (const auto &I : FS->getCallsiteSamples())
|
||||
for (const auto &J : I.second) {
|
||||
const FunctionSamples *CalleeSamples = &J.second;
|
||||
if (callsiteIsHot(FS, CalleeSamples))
|
||||
Total += countBodySamples(CalleeSamples);
|
||||
}
|
||||
|
||||
return Total;
|
||||
}
|
||||
|
@ -559,12 +564,49 @@ SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {
|
|||
if (!DIL) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
StringRef CalleeName;
|
||||
if (const CallInst *CI = dyn_cast<CallInst>(&Inst))
|
||||
if (Function *Callee = CI->getCalledFunction())
|
||||
CalleeName = Callee->getName();
|
||||
|
||||
const FunctionSamples *FS = findFunctionSamples(Inst);
|
||||
if (FS == nullptr)
|
||||
return nullptr;
|
||||
|
||||
return FS->findFunctionSamplesAt(
|
||||
LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()));
|
||||
LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()), CalleeName);
|
||||
}
|
||||
|
||||
/// Returns a vector of FunctionSamples that are the indirect call targets
|
||||
/// of \p Inst. The vector is sorted by the total number of samples.
|
||||
std::vector<const FunctionSamples *>
|
||||
SampleProfileLoader::findIndirectCallFunctionSamples(
|
||||
const Instruction &Inst) const {
|
||||
const DILocation *DIL = Inst.getDebugLoc();
|
||||
std::vector<const FunctionSamples *> R;
|
||||
|
||||
if (!DIL) {
|
||||
return R;
|
||||
}
|
||||
|
||||
const FunctionSamples *FS = findFunctionSamples(Inst);
|
||||
if (FS == nullptr)
|
||||
return R;
|
||||
|
||||
if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(
|
||||
LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()))) {
|
||||
if (M->size() == 0)
|
||||
return R;
|
||||
for (const auto &NameFS : *M) {
|
||||
R.push_back(&NameFS.second);
|
||||
}
|
||||
std::sort(R.begin(), R.end(),
|
||||
[](const FunctionSamples *L, const FunctionSamples *R) {
|
||||
return L->getTotalSamples() > R->getTotalSamples();
|
||||
});
|
||||
}
|
||||
return R;
|
||||
}
|
||||
|
||||
/// \brief Get the FunctionSamples for an instruction.
|
||||
|
@ -578,18 +620,23 @@ SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {
|
|||
/// \returns the FunctionSamples pointer to the inlined instance.
|
||||
const FunctionSamples *
|
||||
SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
|
||||
SmallVector<LineLocation, 10> S;
|
||||
SmallVector<std::pair<LineLocation, StringRef>, 10> S;
|
||||
const DILocation *DIL = Inst.getDebugLoc();
|
||||
if (!DIL) {
|
||||
if (!DIL)
|
||||
return Samples;
|
||||
|
||||
const DILocation *PrevDIL = DIL;
|
||||
for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) {
|
||||
S.push_back(std::make_pair(
|
||||
LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()),
|
||||
PrevDIL->getScope()->getSubprogram()->getLinkageName()));
|
||||
PrevDIL = DIL;
|
||||
}
|
||||
for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt())
|
||||
S.push_back(LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()));
|
||||
if (S.size() == 0)
|
||||
return Samples;
|
||||
const FunctionSamples *FS = Samples;
|
||||
for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) {
|
||||
FS = FS->findFunctionSamplesAt(S[i]);
|
||||
FS = FS->findFunctionSamplesAt(S[i].first, S[i].second);
|
||||
}
|
||||
return FS;
|
||||
}
|
||||
|
@ -638,25 +685,27 @@ bool SampleProfileLoader::inlineHotFunctions(
|
|||
Function *CalledFunction = CallSite(I).getCalledFunction();
|
||||
Instruction *DI = I;
|
||||
if (!CalledFunction && !PromotedInsns.count(I) &&
|
||||
CallSite(I).isIndirectCall()) {
|
||||
auto CalleeFunctionName = findCalleeFunctionSamples(*I)->getName();
|
||||
const char *Reason = "Callee function not available";
|
||||
CalledFunction = F.getParent()->getFunction(CalleeFunctionName);
|
||||
if (CalledFunction && isLegalToPromote(I, CalledFunction, &Reason)) {
|
||||
// The indirect target was promoted and inlined in the profile, as a
|
||||
// result, we do not have profile info for the branch probability.
|
||||
// We set the probability to 80% taken to indicate that the static
|
||||
// call is likely taken.
|
||||
DI = dyn_cast<Instruction>(
|
||||
promoteIndirectCall(I, CalledFunction, 80, 100, false)
|
||||
->stripPointerCasts());
|
||||
PromotedInsns.insert(I);
|
||||
} else {
|
||||
DEBUG(dbgs() << "\nFailed to promote indirect call to "
|
||||
<< CalleeFunctionName << " because " << Reason << "\n");
|
||||
continue;
|
||||
CallSite(I).isIndirectCall())
|
||||
for (const auto *FS : findIndirectCallFunctionSamples(*I)) {
|
||||
auto CalleeFunctionName = FS->getName();
|
||||
const char *Reason = "Callee function not available";
|
||||
CalledFunction = F.getParent()->getFunction(CalleeFunctionName);
|
||||
if (CalledFunction && isLegalToPromote(I, CalledFunction, &Reason)) {
|
||||
// The indirect target was promoted and inlined in the profile, as a
|
||||
// result, we do not have profile info for the branch probability.
|
||||
// We set the probability to 80% taken to indicate that the static
|
||||
// call is likely taken.
|
||||
DI = dyn_cast<Instruction>(
|
||||
promoteIndirectCall(I, CalledFunction, 80, 100, false)
|
||||
->stripPointerCasts());
|
||||
PromotedInsns.insert(I);
|
||||
} else {
|
||||
DEBUG(dbgs() << "\nFailed to promote indirect call to "
|
||||
<< CalleeFunctionName << " because " << Reason
|
||||
<< "\n");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!CalledFunction || !CalledFunction->getSubprogram()) {
|
||||
findCalleeFunctionSamples(*I)->findImportedFunctions(
|
||||
ImportGUIDs, F.getParent(),
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
test:63067:0
|
||||
4: 3345 _Z3barv:1398 _Z3foov:2059
|
||||
test_inline:3000:0
|
||||
5: foo_inline:3000
|
||||
5: foo_inline1:3000
|
||||
1: 3000
|
||||
5: foo_inline2:4000
|
||||
1: 4000
|
||||
test_noinline:3000:0
|
||||
5: foo_noinline:3000
|
||||
1: 3000
|
||||
|
|
|
@ -16,10 +16,14 @@ define void @test_inline(i64* (i32*)*, i32* %x) !dbg !3 {
|
|||
%2 = alloca i64* (i32*)*
|
||||
store i64* (i32*)* %0, i64* (i32*)** %2
|
||||
%3 = load i64* (i32*)*, i64* (i32*)** %2
|
||||
; CHECK: icmp {{.*}} @foo_inline
|
||||
; CHECK: icmp {{.*}} @foo_inline2
|
||||
; CHECK: if.true.direct_targ:
|
||||
; CHECK-NOT: call
|
||||
; CHECK: if.false.orig_indirect:
|
||||
; CHECK: icmp {{.*}} @foo_inline1
|
||||
; CHECK: if.true.direct_targ1:
|
||||
; CHECK-NOT: call
|
||||
; CHECK: if.false.orig_indirect2:
|
||||
; CHECK: call
|
||||
call i64* %3(i32* %x), !dbg !5
|
||||
ret void
|
||||
|
@ -39,7 +43,11 @@ define void @test_noinline(void ()*) !dbg !3 {
|
|||
|
||||
@x = global i32 0, align 4
|
||||
|
||||
define i32* @foo_inline(i32* %x) !dbg !3 {
|
||||
define i32* @foo_inline1(i32* %x) !dbg !3 {
|
||||
ret i32* %x
|
||||
}
|
||||
|
||||
define i32* @foo_inline2(i32* %x) !dbg !3 {
|
||||
ret i32* %x
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue