[llvm-profgen] Decoding pseudo probe for profiled function only.

Complete pseudo probes decoding can result in large memory usage. In practice only a small porting of the decoded probes are used in profile generation. I'm changing the full decoding mode to be decoding for profiled functions only, though we still do a full scan of the .pseudoprobe section due to a missing table-of-content but we don't have to build the in-memory data structure for functions not sampled.

To build the in-memory data structure for profiled functions only, I'm rewriting the previous non-recursive probe decoding logic to be recursive. This is easy to read and maintain.

I also have to change the previous representation of unsymbolized context from probe-based stack to address-based stack since the profiled functions are unknown yet by the time of virtual unwinding. The address-based stack will be converted to probe-based stack after virtual unwinding and on-demand probe decoding.

I'm seeing 20GB memory is saved for one of our internal large service.

Reviewed By: wenlei

Differential Revision: https://reviews.llvm.org/D121643
This commit is contained in:
Hongtao Yu 2022-03-23 12:36:44 -07:00
parent d90a3fcacd
commit 3f97016857
10 changed files with 288 additions and 194 deletions

View File

@ -55,6 +55,7 @@
#include <tuple>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <vector>
namespace llvm {
@ -353,6 +354,15 @@ public:
// Decode pseudo_probe section to build address to probes map.
bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size);
// Decode pseudo_probe section to build address to probes map for specifed
// functions only.
bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size,
std::unordered_set<uint64_t> &GuildFilter);
bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur,
uint64_t &LastAddr,
std::unordered_set<uint64_t> &GuildFilter);
// Print pseudo_probe_desc section info
void printGUID2FuncDescMap(raw_ostream &OS);

View File

@ -358,8 +358,9 @@ bool MCPseudoProbeDecoder::buildGUID2FuncDescMap(const uint8_t *Start,
return true;
}
bool MCPseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start,
std::size_t Size) {
bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
MCDecodedPseudoProbeInlineTree *Cur, uint64_t &LastAddr,
std::unordered_set<uint64_t> &GuildFilter) {
// The pseudo_probe section encodes an inline forest and each tree has a
// format like:
// FUNCTION BODY (one for each uninlined function present in the text
@ -390,18 +391,10 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start,
// FUNCTION BODY
// A FUNCTION BODY entry describing the inlined function.
Data = Start;
End = Data + Size;
MCDecodedPseudoProbeInlineTree *Root = &DummyInlineRoot;
MCDecodedPseudoProbeInlineTree *Cur = &DummyInlineRoot;
uint64_t LastAddr = 0;
uint32_t Index = 0;
// A DFS-based decoding
while (Data < End) {
if (Root == Cur) {
if (Cur == &DummyInlineRoot) {
// Use a sequential id for top level inliner.
Index = Root->getChildren().size();
Index = Cur->getChildren().size();
} else {
// Read inline site for inlinees
auto ErrorOrIndex = readUnsignedNumber<uint32_t>();
@ -409,13 +402,25 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start,
return false;
Index = std::move(*ErrorOrIndex);
}
// Switch/add to a new tree node(inlinee)
Cur = Cur->getOrAddNode(std::make_tuple(Cur->Guid, Index));
// Read guid
auto ErrorOrCurGuid = readUnencodedNumber<uint64_t>();
if (!ErrorOrCurGuid)
return false;
Cur->Guid = std::move(*ErrorOrCurGuid);
uint64_t Guid = std::move(*ErrorOrCurGuid);
// Decide if top-level node should be disgarded.
if (Cur == &DummyInlineRoot && !GuildFilter.empty() &&
!GuildFilter.count(Guid))
Cur = nullptr;
// If the incoming node is null, all its children nodes should be disgarded.
if (Cur) {
// Switch/add to a new tree node(inlinee)
Cur = Cur->getOrAddNode(std::make_tuple(Cur->Guid, Index));
Cur->Guid = Guid;
}
// Read number of probes in the current node.
auto ErrorOrNodeCount = readUnsignedNumber<uint32_t>();
if (!ErrorOrNodeCount)
@ -425,7 +430,6 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start,
auto ErrorOrCurChildrenToProcess = readUnsignedNumber<uint32_t>();
if (!ErrorOrCurChildrenToProcess)
return false;
Cur->ChildrenToProcess = std::move(*ErrorOrCurChildrenToProcess);
// Read all probes in this node
for (std::size_t I = 0; I < NodeCount; I++) {
// Read index
@ -454,37 +458,43 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start,
return false;
Addr = std::move(*ErrorOrAddr);
}
if (Cur) {
// Populate Address2ProbesMap
auto &Probes = Address2ProbesMap[Addr];
Probes.emplace_back(Addr, Cur->Guid, Index, PseudoProbeType(Kind), Attr,
Cur);
Cur->addProbes(&Probes.back());
}
LastAddr = Addr;
}
// Look for the parent for the next node by subtracting the current
// node count from tree counts along the parent chain. The first node
// in the chain that has a non-zero tree count is the target.
while (Cur != Root) {
if (Cur->ChildrenToProcess == 0) {
Cur = static_cast<MCDecodedPseudoProbeInlineTree *>(Cur->Parent);
if (Cur != Root) {
assert(Cur->ChildrenToProcess > 0 &&
"Should have some unprocessed nodes");
Cur->ChildrenToProcess -= 1;
}
} else {
break;
}
}
uint32_t ChildrenToProcess = std::move(*ErrorOrCurChildrenToProcess);
for (uint32_t I = 0; I < ChildrenToProcess; I++) {
buildAddress2ProbeMap(Cur, LastAddr, GuildFilter);
}
assert(Data == End && "Have unprocessed data in pseudo_probe section");
assert(Cur == Root &&
" Cur should point to root when the forest is fully built up");
return true;
}
bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
const uint8_t *Start, std::size_t Size,
std::unordered_set<uint64_t> &GuildFilter) {
Data = Start;
End = Data + Size;
uint64_t LastAddr = 0;
while (Data < End)
buildAddress2ProbeMap(&DummyInlineRoot, LastAddr, GuildFilter);
assert(Data == End && "Have unprocessed data in pseudo_probe section");
return true;
}
bool MCPseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start,
std::size_t Size) {
std::unordered_set<uint64_t> GuildFilter;
return buildAddress2ProbeMap(Start, Size, GuildFilter);
}
void MCPseudoProbeDecoder::printGUID2FuncDescMap(raw_ostream &OS) {
OS << "Pseudo Probe Desc:\n";
// Make the output deterministic

View File

@ -24,14 +24,14 @@
; CHECK-NEXT: 4: 15
; CHECK-NEXT: !CFGChecksum: 72617220756
; CHECK-UNWINDER: [main:2]
; CHECK-UNWINDER: [0x7f4]
; CHECK-UNWINDER-NEXT: 2
; CHECK-UNWINDER-NEXT: 79e-7bf:15
; CHECK-UNWINDER-NEXT: 7c4-7cf:15
; CHECK-UNWINDER-NEXT: 2
; CHECK-UNWINDER-NEXT: 7bf->760:15
; CHECK-UNWINDER-NEXT: 7cf->79e:16
; CHECK-UNWINDER-NEXT: [main:2 @ foo:8]
; CHECK-UNWINDER-NEXT: [0x7f4 @ 0x7bf]
; CHECK-UNWINDER-NEXT: 1
; CHECK-UNWINDER-NEXT: 760-77f:15
; CHECK-UNWINDER-NEXT: 1

View File

@ -123,7 +123,7 @@
; CHECK: 6: 1 fa:1
; CHECK: !CFGChecksum: 563022570642068
; CHECK-UNWINDER: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5]
; CHECK-UNWINDER: [0x842 @ 0x7d4 @ 0x7e0 @ 0x7ab]
; CHECK-UNWINDER-NEXT: 3
; CHECK-UNWINDER-NEXT: 7a0-7a7:1
; CHECK-UNWINDER-NEXT: 7a0-7ab:3
@ -132,33 +132,33 @@
; CHECK-UNWINDER-NEXT: 7a7->7b2:1
; CHECK-UNWINDER-NEXT: 7ab->7a0:4
; CHECK-UNWINDER-NEXT: 7b5->7c0:1
; CHECK-UNWINDER-NEXT: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6]
; CHECK-UNWINDER-NEXT: [0x842 @ 0x7d4 @ 0x7e0 @ 0x7ab @ 0x7b5]
; CHECK-UNWINDER-NEXT: 1
; CHECK-UNWINDER-NEXT: 7c0-7d4:1
; CHECK-UNWINDER-NEXT: 1
; CHECK-UNWINDER-NEXT: 7d4->7c0:1
; CHECK-UNWINDER-NEXT: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8]
; CHECK-UNWINDER-NEXT: [0x842 @ 0x7d4 @ 0x7e0 @ 0x7ab @ 0x7b5 @ 0x7d4]
; CHECK-UNWINDER-NEXT: 2
; CHECK-UNWINDER-NEXT: 7c0-7cd:1
; CHECK-UNWINDER-NEXT: 7db-7e0:1
; CHECK-UNWINDER-NEXT: 2
; CHECK-UNWINDER-NEXT: 7cd->7db:1
; CHECK-UNWINDER-NEXT: 7e0->7a0:1
; CHECK-UNWINDER-NEXT: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7]
; CHECK-UNWINDER-NEXT: [0x842 @ 0x7d4 @ 0x7e0 @ 0x7ab @ 0x7b5 @ 0x7d4 @ 0x7e0]
; CHECK-UNWINDER-NEXT: 2
; CHECK-UNWINDER-NEXT: 7a0-7a7:1
; CHECK-UNWINDER-NEXT: 7b2-7b5:1
; CHECK-UNWINDER-NEXT: 2
; CHECK-UNWINDER-NEXT: 7a7->7b2:1
; CHECK-UNWINDER-NEXT: 7b5->7c0:1
; CHECK-UNWINDER-NEXT: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6]
; CHECK-UNWINDER-NEXT: [0x842 @ 0x7d4 @ 0x7e0 @ 0x7ab @ 0x7b5 @ 0x7d4 @ 0x7e0 @ 0x7b5]
; CHECK-UNWINDER-NEXT: 2
; CHECK-UNWINDER-NEXT: 7c0-7cd:2
; CHECK-UNWINDER-NEXT: 7db-7e0:1
; CHECK-UNWINDER-NEXT: 2
; CHECK-UNWINDER-NEXT: 7cd->7db:2
; CHECK-UNWINDER-NEXT: 7e0->7a0:1
; CHECK-UNWINDER-NEXT: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7]
; CHECK-UNWINDER-NEXT: [0x842 @ 0x7d4 @ 0x7e0 @ 0x7ab @ 0x7b5 @ 0x7d4 @ 0x7e0 @ 0x7b5 @ 0x7e0]
; CHECK-UNWINDER-NEXT: 2
; CHECK-UNWINDER-NEXT: 7a0-7a7:1
; CHECK-UNWINDER-NEXT: 7b2-7b5:1

View File

@ -179,17 +179,12 @@ std::shared_ptr<StringBasedCtxKey> FrameStack::getContextKey() {
return KeyStr;
}
std::shared_ptr<ProbeBasedCtxKey> ProbeStack::getContextKey() {
std::shared_ptr<ProbeBasedCtxKey> ProbeBasedKey =
std::make_shared<ProbeBasedCtxKey>();
for (auto CallProbe : Stack) {
ProbeBasedKey->Probes.emplace_back(CallProbe);
}
CSProfileGenerator::compressRecursionContext<const MCDecodedPseudoProbe *>(
ProbeBasedKey->Probes);
CSProfileGenerator::trimContext<const MCDecodedPseudoProbe *>(
ProbeBasedKey->Probes);
return ProbeBasedKey;
std::shared_ptr<AddrBasedCtxKey> AddressStack::getContextKey() {
std::shared_ptr<AddrBasedCtxKey> KeyStr = std::make_shared<AddrBasedCtxKey>();
KeyStr->Context = Stack;
CSProfileGenerator::compressRecursionContext<uint64_t>(KeyStr->Context);
CSProfileGenerator::trimContext<uint64_t>(KeyStr->Context);
return KeyStr;
}
template <typename T>
@ -252,8 +247,8 @@ void VirtualUnwinder::collectSamplesFromFrameTrie(
void VirtualUnwinder::collectSamplesFromFrameTrie(
UnwindState::ProfiledFrame *Cur) {
if (Binary->usePseudoProbes()) {
ProbeStack Stack(Binary);
collectSamplesFromFrameTrie<ProbeStack>(Cur, Stack);
AddressStack Stack(Binary);
collectSamplesFromFrameTrie<AddressStack>(Cur, Stack);
} else {
FrameStack Stack(Binary);
collectSamplesFromFrameTrie<FrameStack>(Cur, Stack);
@ -461,14 +456,17 @@ static std::string getContextKeyStr(ContextKey *K,
const ProfiledBinary *Binary) {
if (const auto *CtxKey = dyn_cast<StringBasedCtxKey>(K)) {
return SampleContext::getContextString(CtxKey->Context);
} else if (const auto *CtxKey = dyn_cast<ProbeBasedCtxKey>(K)) {
SampleContextFrameVector ContextStack;
for (const auto *Probe : CtxKey->Probes) {
Binary->getInlineContextForProbe(Probe, ContextStack, true);
} else if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(K)) {
std::ostringstream OContextStr;
for (uint32_t I = 0; I < CtxKey->Context.size(); I++) {
if (OContextStr.str().size())
OContextStr << " @ ";
OContextStr << "0x"
<< to_hexString(
Binary->virtualAddrToOffset(CtxKey->Context[I]),
false);
}
// Probe context key at this point does not have leaf probe, so do not
// include the leaf inline location.
return SampleContext::getContextString(ContextStack, true);
return OContextStr.str();
} else {
llvm_unreachable("unexpected key type");
}

View File

@ -333,7 +333,7 @@ struct ContextKey {
};
// Utilities for LLVM-style RTTI
enum ContextKind { CK_StringBased, CK_ProbeBased };
enum ContextKind { CK_StringBased, CK_AddrBased };
const ContextKind Kind;
ContextKind getKind() const { return Kind; }
ContextKey(ContextKind K) : Kind(K){};
@ -359,34 +359,23 @@ struct StringBasedCtxKey : public ContextKey {
}
};
// Probe based context key as the intermediate key of context
// String based context key will introduce redundant string handling
// since the callee context is inferred from the context string which
// need to be splitted by '@' to get the last location frame, so we
// can just use probe instead and generate the string in the end.
struct ProbeBasedCtxKey : public ContextKey {
SmallVector<const MCDecodedPseudoProbe *, 16> Probes;
// Address-based context id
struct AddrBasedCtxKey : public ContextKey {
SmallVector<uint64_t, 16> Context;
ProbeBasedCtxKey() : ContextKey(CK_ProbeBased) {}
bool WasLeafInlined;
AddrBasedCtxKey() : ContextKey(CK_AddrBased), WasLeafInlined(false){};
static bool classof(const ContextKey *K) {
return K->getKind() == CK_ProbeBased;
return K->getKind() == CK_AddrBased;
}
bool isEqual(const ContextKey *K) const override {
const ProbeBasedCtxKey *O = dyn_cast<ProbeBasedCtxKey>(K);
assert(O != nullptr && "Probe based key shouldn't be null in isEqual");
return std::equal(Probes.begin(), Probes.end(), O->Probes.begin(),
O->Probes.end());
const AddrBasedCtxKey *Other = dyn_cast<AddrBasedCtxKey>(K);
return Context == Other->Context;
}
void genHashCode() override {
for (const auto *P : Probes) {
HashCode = hash_combine(HashCode, P);
}
if (HashCode == 0) {
// Avoid zero value of HashCode when it's an empty list
HashCode = 1;
}
HashCode = hash_combine_range(Context.begin(), Context.end());
}
};
@ -433,22 +422,14 @@ struct FrameStack {
std::shared_ptr<StringBasedCtxKey> getContextKey();
};
struct ProbeStack {
SmallVector<const MCDecodedPseudoProbe *, 16> Stack;
struct AddressStack {
SmallVector<uint64_t, 16> Stack;
ProfiledBinary *Binary;
ProbeStack(ProfiledBinary *B) : Binary(B) {}
AddressStack(ProfiledBinary *B) : Binary(B) {}
bool pushFrame(UnwindState::ProfiledFrame *Cur) {
assert(!Cur->isExternalFrame() &&
"External frame's not expected for context stack.");
const MCDecodedPseudoProbe *CallProbe =
Binary->getCallProbeForAddr(Cur->Address);
// We may not find a probe for a merged or external callsite.
// Callsite merging may cause the loss of original probe IDs.
// Cutting off the context from here since the inliner will
// not know how to consume a context with unknown callsites.
if (!CallProbe)
return false;
Stack.push_back(CallProbe);
Stack.push_back(Cur->Address);
return true;
}
@ -456,18 +437,7 @@ struct ProbeStack {
if (!Stack.empty())
Stack.pop_back();
}
// Use pseudo probe based context key to get the sample counter
// A context stands for a call path from 'main' to an uninlined
// callee with all inline frames recovered on that path. The probes
// belonging to that call path is the probes either originated from
// the callee or from any functions inlined into the callee. Since
// pseudo probes are organized in a tri-tree style after decoded,
// the tree path from the tri-tree root (which is the uninlined
// callee) to the probe node forms an inline context.
// Here we use a list of probe(pointer) as the context key to speed up
// aggregation and the final context string will be generate in
// ProfileGenerator
std::shared_ptr<ProbeBasedCtxKey> getContextKey();
std::shared_ptr<AddrBasedCtxKey> getContextKey();
};
/*

View File

@ -5,12 +5,12 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "ProfileGenerator.h"
#include "ErrorHandling.h"
#include "ProfiledBinary.h"
#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
#include "llvm/ProfileData/ProfileCommon.h"
#include <algorithm>
#include <float.h>
#include <unordered_set>
@ -370,6 +370,39 @@ void ProfileGeneratorBase::updateTotalSamples() {
}
}
void ProfileGeneratorBase::collectProfiledFunctions() {
std::unordered_set<const BinaryFunction *> ProfiledFunctions;
// Go through all the stacks, ranges and branches in sample counters, use the
// start of the range to look up the function it belongs and record the
// function.
for (const auto &CI : SampleCounters) {
if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) {
for (auto Addr : CtxKey->Context) {
if (FuncRange *FRange = Binary->findFuncRangeForOffset(
Binary->virtualAddrToOffset(Addr)))
ProfiledFunctions.insert(FRange->Func);
}
}
for (auto Item : CI.second.RangeCounter) {
uint64_t StartOffset = Item.first.first;
if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset))
ProfiledFunctions.insert(FRange->Func);
}
for (auto Item : CI.second.BranchCounter) {
uint64_t SourceOffset = Item.first.first;
uint64_t TargetOffset = Item.first.first;
if (FuncRange *FRange = Binary->findFuncRangeForOffset(SourceOffset))
ProfiledFunctions.insert(FRange->Func);
if (FuncRange *FRange = Binary->findFuncRangeForOffset(TargetOffset))
ProfiledFunctions.insert(FRange->Func);
}
}
Binary->setProfiledFunctions(ProfiledFunctions);
}
FunctionSamples &
ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
SampleContext Context(FuncName);
@ -382,6 +415,7 @@ ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
}
void ProfileGenerator::generateProfile() {
collectProfiledFunctions();
if (Binary->usePseudoProbes()) {
generateProbeBasedProfile();
} else {
@ -428,6 +462,7 @@ void ProfileGenerator::generateLineNumBasedProfile() {
void ProfileGenerator::generateProbeBasedProfile() {
assert(SampleCounters.size() == 1 &&
"Must have one entry for profile generation.");
Binary->decodePseudoProbe();
// Enable pseudo probe functionalities in SampleProf
FunctionSamples::ProfileIsProbeBased = true;
const SampleCounter &SC = SampleCounters.begin()->second;
@ -442,16 +477,18 @@ void ProfileGenerator::generateProbeBasedProfile() {
void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions(
const RangeSample &RangeCounter) {
ProbeCounterMap ProbeCounter;
// preprocessRangeCounter returns disjoint ranges, so no longer to redo it inside
// extractProbesFromRange.
extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter, false);
// preprocessRangeCounter returns disjoint ranges, so no longer to redo it
// inside extractProbesFromRange.
extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter,
false);
for (const auto &PI : ProbeCounter) {
const MCDecodedPseudoProbe *Probe = PI.first;
uint64_t Count = PI.second;
SampleContextFrameVector FrameVec;
Binary->getInlineContextForProbe(Probe, FrameVec, true);
FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(FrameVec, Count);
FunctionSamples &FunctionProfile =
getLeafProfileAndAddTotalSamples(FrameVec, Count);
FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
if (Probe->isEntry())
FunctionProfile.addHeadSamples(Count);
@ -496,7 +533,8 @@ FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples(
&getTopLevelFunctionProfile(FrameVec[0].FuncName);
FunctionProfile->addTotalSamples(Count);
if (Binary->usePseudoProbes()) {
const auto *FuncDesc = Binary->getFuncDescForGUID(Function::getGUID(FunctionProfile->getName()));
const auto *FuncDesc = Binary->getFuncDescForGUID(
Function::getGUID(FunctionProfile->getName()));
FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
}
@ -515,7 +553,8 @@ FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples(
FunctionProfile = &Ret.first->second;
FunctionProfile->addTotalSamples(Count);
if (Binary->usePseudoProbes()) {
const auto *FuncDesc = Binary->getFuncDescForGUID(Function::getGUID(FunctionProfile->getName()));
const auto *FuncDesc = Binary->getFuncDescForGUID(
Function::getGUID(FunctionProfile->getName()));
FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
}
}
@ -646,32 +685,23 @@ FunctionSamples &CSProfileGenerator::getFunctionProfileForContext(
void CSProfileGenerator::generateProfile() {
FunctionSamples::ProfileIsCSFlat = true;
if (Binary->getTrackFuncContextSize())
computeSizeForProfiledFunctions();
collectProfiledFunctions();
if (Binary->usePseudoProbes()) {
generateProbeBasedProfile();
} else {
generateLineNumBasedProfile();
}
if (Binary->getTrackFuncContextSize())
computeSizeForProfiledFunctions();
postProcessProfiles();
}
void CSProfileGenerator::computeSizeForProfiledFunctions() {
std::unordered_set<const BinaryFunction *> ProfiledFunctions;
// Go through all the ranges in the CS counters, use the start of the range to
// look up the function it belongs and record the function.
for (const auto &CI : SampleCounters) {
for (const auto &Item : CI.second.RangeCounter) {
// FIXME: Filter the bogus crossing function range.
uint64_t StartOffset = Item.first.first;
if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset))
ProfiledFunctions.insert(FRange->Func);
}
}
for (auto *Func : ProfiledFunctions)
for (auto *Func : Binary->getProfiledFunctions())
Binary->computeInlinedContextSizeForFunc(Func);
// Flush the symbolizer to save memory.
@ -907,25 +937,44 @@ void ProfileGeneratorBase::extractProbesFromRange(
}
}
// Helper function to extract context prefix string stack
// Extract context stack for reusing, leaf context stack will
// be added compressed while looking up function profile
static void extractPrefixContextStack(
SampleContextFrameVector &ContextStack,
const SmallVectorImpl<const MCDecodedPseudoProbe *> &Probes,
static void
extractPrefixContextStack(SampleContextFrameVector &ContextStack,
const SmallVectorImpl<uint64_t> &Addresses,
ProfiledBinary *Binary) {
SmallVector<const MCDecodedPseudoProbe *, 16> Probes;
for (auto Addr : reverse(Addresses)) {
const MCDecodedPseudoProbe *CallProbe = Binary->getCallProbeForAddr(Addr);
// These could be the cases when a probe is not found at a calliste. Cutting
// off the context from here since the inliner will not know how to consume
// a context with unknown callsites.
// 1. for functions that are not sampled when
// --decode-probe-for-profiled-functions-only is on.
// 2. for a merged callsite. Callsite merging may cause the loss of original
// probe IDs.
// 3. for an external callsite.
if (!CallProbe)
break;
Probes.push_back(CallProbe);
}
std::reverse(Probes.begin(), Probes.end());
// Extract context stack for reusing, leaf context stack will be added
// compressed while looking up function profile.
for (const auto *P : Probes) {
Binary->getInlineContextForProbe(P, ContextStack, true);
}
}
void CSProfileGenerator::generateProbeBasedProfile() {
Binary->decodePseudoProbe();
// Enable pseudo probe functionalities in SampleProf
FunctionSamples::ProfileIsProbeBased = true;
for (const auto &CI : SampleCounters) {
const auto *CtxKey = cast<ProbeBasedCtxKey>(CI.first.getPtr());
const AddrBasedCtxKey *CtxKey =
dyn_cast<AddrBasedCtxKey>(CI.first.getPtr());
SampleContextFrameVector ContextStack;
extractPrefixContextStack(ContextStack, CtxKey->Probes, Binary);
extractPrefixContextStack(ContextStack, CtxKey->Context, Binary);
// Fill in function body samples from probes, also infer caller's samples
// from callee's probe
populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack);

View File

@ -106,6 +106,8 @@ protected:
void showDensitySuggestion(double Density);
void collectProfiledFunctions();
// Thresholds from profile summary to answer isHotCount/isColdCount queries.
uint64_t HotCountThreshold;

View File

@ -156,7 +156,8 @@ void BinarySizeContextTracker::trackInlineesOptimizedAway(
for (const auto &ChildNode : ProbeNode.getChildren()) {
InlineSite Location = ChildNode.first;
ProbeContext.back().second = std::get<1>(Location);
trackInlineesOptimizedAway(ProbeDecoder, *ChildNode.second.get(), ProbeContext);
trackInlineesOptimizedAway(ProbeDecoder, *ChildNode.second.get(),
ProbeContext);
}
ProbeContext.pop_back();
@ -208,7 +209,9 @@ void ProfiledBinary::load() {
// Find the preferred load address for text sections.
setPreferredTextSegmentAddresses(Obj);
// Decode pseudo probe related section
checkPseudoProbe(Obj);
if (ShowDisassemblyOnly)
decodePseudoProbe(Obj);
// Load debug info of subprograms from DWARF section.
@ -287,7 +290,8 @@ ProfiledBinary::getExpandedContext(const SmallVectorImpl<uint64_t> &Stack,
}
template <class ELFT>
void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj, StringRef FileName) {
void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj,
StringRef FileName) {
const auto &PhdrRange = unwrapOrError(Obj.program_headers(), FileName);
// FIXME: This should be the page size of the system running profiling.
// However such info isn't available at post-processing time, assuming
@ -311,7 +315,8 @@ void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj,
exitWithError("no executable segment found", FileName);
}
void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFObjectFileBase *Obj) {
void ProfiledBinary::setPreferredTextSegmentAddresses(
const ELFObjectFileBase *Obj) {
if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
@ -324,10 +329,38 @@ void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFObjectFileBase *O
llvm_unreachable("invalid ELF object format");
}
void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) {
void ProfiledBinary::checkPseudoProbe(const ELFObjectFileBase *Obj) {
if (UseDwarfCorrelation)
return;
bool HasProbeDescSection = false;
bool HasPseudoProbeSection = false;
StringRef FileName = Obj->getFileName();
for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
SI != SE; ++SI) {
const SectionRef &Section = *SI;
StringRef SectionName = unwrapOrError(Section.getName(), FileName);
if (SectionName == ".pseudo_probe_desc") {
HasProbeDescSection = true;
} else if (SectionName == ".pseudo_probe") {
HasPseudoProbeSection = true;
}
}
// set UsePseudoProbes flag, used for PerfReader
UsePseudoProbes = HasProbeDescSection && HasPseudoProbeSection;
}
void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) {
if (!UsePseudoProbes)
return;
std::unordered_set<uint64_t> ProfiledGuids;
if (!ShowDisassemblyOnly)
for (auto *F : ProfiledFunctions)
ProfiledGuids.insert(Function::getGUID(F->FuncName));
StringRef FileName = Obj->getFileName();
for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
SI != SE; ++SI) {
@ -339,21 +372,20 @@ void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) {
if (!ProbeDecoder.buildGUID2FuncDescMap(
reinterpret_cast<const uint8_t *>(Contents.data()),
Contents.size()))
exitWithError("Pseudo Probe decoder fail in .pseudo_probe_desc section");
exitWithError(
"Pseudo Probe decoder fail in .pseudo_probe_desc section");
} else if (SectionName == ".pseudo_probe") {
StringRef Contents = unwrapOrError(Section.getContents(), FileName);
if (!ProbeDecoder.buildAddress2ProbeMap(
reinterpret_cast<const uint8_t *>(Contents.data()),
Contents.size()))
Contents.size(), ProfiledGuids))
exitWithError("Pseudo Probe decoder fail in .pseudo_probe section");
// set UsePseudoProbes flag, used for PerfReader
UsePseudoProbes = true;
}
}
// Build TopLevelProbeFrameMap to track size for optimized inlinees when probe
// is available
if (UsePseudoProbes && TrackFuncContextSize) {
if (TrackFuncContextSize) {
for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) {
auto *Frame = Child.second.get();
StringRef FuncName =
@ -366,6 +398,13 @@ void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) {
ProbeDecoder.printGUID2FuncDescMap(outs());
}
void ProfiledBinary::decodePseudoProbe() {
OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path);
Binary &ExeBinary = *OBinary.getBinary();
auto *Obj = dyn_cast<ELFObjectFileBase>(&ExeBinary);
decodePseudoProbe(Obj);
}
void ProfiledBinary::setIsFuncEntry(uint64_t Offset, StringRef RangeSymName) {
// Note that the start offset of each ELF section can be a non-function
// symbol, we need to binary search for the start of a real function range.

View File

@ -218,6 +218,9 @@ class ProfiledBinary {
// A map of mapping function name to BinaryFunction info.
std::unordered_map<std::string, BinaryFunction> BinaryFunctions;
// A list of binary functions that have samples.
std::unordered_set<const BinaryFunction *> ProfiledFunctions;
// An ordered map of mapping function's start offset to function range
// relevant info. Currently to determine if the offset of ELF is the start of
// a real function, we leverage the function range info from DWARF.
@ -278,6 +281,8 @@ class ProfiledBinary {
template <class ELFT>
void setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj, StringRef FileName);
void checkPseudoProbe(const ELFObjectFileBase *Obj);
void decodePseudoProbe(const ELFObjectFileBase *Obj);
void
@ -331,6 +336,9 @@ public:
setupSymbolizer();
load();
}
void decodePseudoProbe();
uint64_t virtualAddrToOffset(uint64_t VirtualAddress) const {
return VirtualAddress - BaseAddress;
}
@ -453,6 +461,14 @@ public:
return BinaryFunctions;
}
std::unordered_set<const BinaryFunction *> &getProfiledFunctions() {
return ProfiledFunctions;
}
void setProfiledFunctions(std::unordered_set<const BinaryFunction *> &Funcs) {
ProfiledFunctions = Funcs;
}
BinaryFunction *getBinaryFunction(StringRef FName) {
auto I = BinaryFunctions.find(FName.str());
if (I == BinaryFunctions.end())