forked from OSchip/llvm-project
[llvm-profgen] Decoding pseudo probe for profiled function only.
Complete pseudo probes decoding can result in large memory usage. In practice only a small porting of the decoded probes are used in profile generation. I'm changing the full decoding mode to be decoding for profiled functions only, though we still do a full scan of the .pseudoprobe section due to a missing table-of-content but we don't have to build the in-memory data structure for functions not sampled. To build the in-memory data structure for profiled functions only, I'm rewriting the previous non-recursive probe decoding logic to be recursive. This is easy to read and maintain. I also have to change the previous representation of unsymbolized context from probe-based stack to address-based stack since the profiled functions are unknown yet by the time of virtual unwinding. The address-based stack will be converted to probe-based stack after virtual unwinding and on-demand probe decoding. I'm seeing 20GB memory is saved for one of our internal large service. Reviewed By: wenlei Differential Revision: https://reviews.llvm.org/D121643
This commit is contained in:
parent
d90a3fcacd
commit
3f97016857
|
@ -55,6 +55,7 @@
|
|||
#include <tuple>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
namespace llvm {
|
||||
|
@ -353,6 +354,15 @@ public:
|
|||
// Decode pseudo_probe section to build address to probes map.
|
||||
bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size);
|
||||
|
||||
// Decode pseudo_probe section to build address to probes map for specifed
|
||||
// functions only.
|
||||
bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size,
|
||||
std::unordered_set<uint64_t> &GuildFilter);
|
||||
|
||||
bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur,
|
||||
uint64_t &LastAddr,
|
||||
std::unordered_set<uint64_t> &GuildFilter);
|
||||
|
||||
// Print pseudo_probe_desc section info
|
||||
void printGUID2FuncDescMap(raw_ostream &OS);
|
||||
|
||||
|
|
|
@ -358,8 +358,9 @@ bool MCPseudoProbeDecoder::buildGUID2FuncDescMap(const uint8_t *Start,
|
|||
return true;
|
||||
}
|
||||
|
||||
bool MCPseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start,
|
||||
std::size_t Size) {
|
||||
bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
|
||||
MCDecodedPseudoProbeInlineTree *Cur, uint64_t &LastAddr,
|
||||
std::unordered_set<uint64_t> &GuildFilter) {
|
||||
// The pseudo_probe section encodes an inline forest and each tree has a
|
||||
// format like:
|
||||
// FUNCTION BODY (one for each uninlined function present in the text
|
||||
|
@ -390,18 +391,10 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start,
|
|||
// FUNCTION BODY
|
||||
// A FUNCTION BODY entry describing the inlined function.
|
||||
|
||||
Data = Start;
|
||||
End = Data + Size;
|
||||
|
||||
MCDecodedPseudoProbeInlineTree *Root = &DummyInlineRoot;
|
||||
MCDecodedPseudoProbeInlineTree *Cur = &DummyInlineRoot;
|
||||
uint64_t LastAddr = 0;
|
||||
uint32_t Index = 0;
|
||||
// A DFS-based decoding
|
||||
while (Data < End) {
|
||||
if (Root == Cur) {
|
||||
if (Cur == &DummyInlineRoot) {
|
||||
// Use a sequential id for top level inliner.
|
||||
Index = Root->getChildren().size();
|
||||
Index = Cur->getChildren().size();
|
||||
} else {
|
||||
// Read inline site for inlinees
|
||||
auto ErrorOrIndex = readUnsignedNumber<uint32_t>();
|
||||
|
@ -409,13 +402,25 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start,
|
|||
return false;
|
||||
Index = std::move(*ErrorOrIndex);
|
||||
}
|
||||
// Switch/add to a new tree node(inlinee)
|
||||
Cur = Cur->getOrAddNode(std::make_tuple(Cur->Guid, Index));
|
||||
|
||||
// Read guid
|
||||
auto ErrorOrCurGuid = readUnencodedNumber<uint64_t>();
|
||||
if (!ErrorOrCurGuid)
|
||||
return false;
|
||||
Cur->Guid = std::move(*ErrorOrCurGuid);
|
||||
uint64_t Guid = std::move(*ErrorOrCurGuid);
|
||||
|
||||
// Decide if top-level node should be disgarded.
|
||||
if (Cur == &DummyInlineRoot && !GuildFilter.empty() &&
|
||||
!GuildFilter.count(Guid))
|
||||
Cur = nullptr;
|
||||
|
||||
// If the incoming node is null, all its children nodes should be disgarded.
|
||||
if (Cur) {
|
||||
// Switch/add to a new tree node(inlinee)
|
||||
Cur = Cur->getOrAddNode(std::make_tuple(Cur->Guid, Index));
|
||||
Cur->Guid = Guid;
|
||||
}
|
||||
|
||||
// Read number of probes in the current node.
|
||||
auto ErrorOrNodeCount = readUnsignedNumber<uint32_t>();
|
||||
if (!ErrorOrNodeCount)
|
||||
|
@ -425,7 +430,6 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start,
|
|||
auto ErrorOrCurChildrenToProcess = readUnsignedNumber<uint32_t>();
|
||||
if (!ErrorOrCurChildrenToProcess)
|
||||
return false;
|
||||
Cur->ChildrenToProcess = std::move(*ErrorOrCurChildrenToProcess);
|
||||
// Read all probes in this node
|
||||
for (std::size_t I = 0; I < NodeCount; I++) {
|
||||
// Read index
|
||||
|
@ -454,37 +458,43 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start,
|
|||
return false;
|
||||
Addr = std::move(*ErrorOrAddr);
|
||||
}
|
||||
|
||||
if (Cur) {
|
||||
// Populate Address2ProbesMap
|
||||
auto &Probes = Address2ProbesMap[Addr];
|
||||
Probes.emplace_back(Addr, Cur->Guid, Index, PseudoProbeType(Kind), Attr,
|
||||
Cur);
|
||||
Cur->addProbes(&Probes.back());
|
||||
}
|
||||
LastAddr = Addr;
|
||||
}
|
||||
|
||||
// Look for the parent for the next node by subtracting the current
|
||||
// node count from tree counts along the parent chain. The first node
|
||||
// in the chain that has a non-zero tree count is the target.
|
||||
while (Cur != Root) {
|
||||
if (Cur->ChildrenToProcess == 0) {
|
||||
Cur = static_cast<MCDecodedPseudoProbeInlineTree *>(Cur->Parent);
|
||||
if (Cur != Root) {
|
||||
assert(Cur->ChildrenToProcess > 0 &&
|
||||
"Should have some unprocessed nodes");
|
||||
Cur->ChildrenToProcess -= 1;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
uint32_t ChildrenToProcess = std::move(*ErrorOrCurChildrenToProcess);
|
||||
for (uint32_t I = 0; I < ChildrenToProcess; I++) {
|
||||
buildAddress2ProbeMap(Cur, LastAddr, GuildFilter);
|
||||
}
|
||||
|
||||
assert(Data == End && "Have unprocessed data in pseudo_probe section");
|
||||
assert(Cur == Root &&
|
||||
" Cur should point to root when the forest is fully built up");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
|
||||
const uint8_t *Start, std::size_t Size,
|
||||
std::unordered_set<uint64_t> &GuildFilter) {
|
||||
Data = Start;
|
||||
End = Data + Size;
|
||||
uint64_t LastAddr = 0;
|
||||
while (Data < End)
|
||||
buildAddress2ProbeMap(&DummyInlineRoot, LastAddr, GuildFilter);
|
||||
assert(Data == End && "Have unprocessed data in pseudo_probe section");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MCPseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start,
|
||||
std::size_t Size) {
|
||||
std::unordered_set<uint64_t> GuildFilter;
|
||||
return buildAddress2ProbeMap(Start, Size, GuildFilter);
|
||||
}
|
||||
|
||||
void MCPseudoProbeDecoder::printGUID2FuncDescMap(raw_ostream &OS) {
|
||||
OS << "Pseudo Probe Desc:\n";
|
||||
// Make the output deterministic
|
||||
|
|
|
@ -24,14 +24,14 @@
|
|||
; CHECK-NEXT: 4: 15
|
||||
; CHECK-NEXT: !CFGChecksum: 72617220756
|
||||
|
||||
; CHECK-UNWINDER: [main:2]
|
||||
; CHECK-UNWINDER: [0x7f4]
|
||||
; CHECK-UNWINDER-NEXT: 2
|
||||
; CHECK-UNWINDER-NEXT: 79e-7bf:15
|
||||
; CHECK-UNWINDER-NEXT: 7c4-7cf:15
|
||||
; CHECK-UNWINDER-NEXT: 2
|
||||
; CHECK-UNWINDER-NEXT: 7bf->760:15
|
||||
; CHECK-UNWINDER-NEXT: 7cf->79e:16
|
||||
; CHECK-UNWINDER-NEXT: [main:2 @ foo:8]
|
||||
; CHECK-UNWINDER-NEXT: [0x7f4 @ 0x7bf]
|
||||
; CHECK-UNWINDER-NEXT: 1
|
||||
; CHECK-UNWINDER-NEXT: 760-77f:15
|
||||
; CHECK-UNWINDER-NEXT: 1
|
||||
|
|
|
@ -123,7 +123,7 @@
|
|||
; CHECK: 6: 1 fa:1
|
||||
; CHECK: !CFGChecksum: 563022570642068
|
||||
|
||||
; CHECK-UNWINDER: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5]
|
||||
; CHECK-UNWINDER: [0x842 @ 0x7d4 @ 0x7e0 @ 0x7ab]
|
||||
; CHECK-UNWINDER-NEXT: 3
|
||||
; CHECK-UNWINDER-NEXT: 7a0-7a7:1
|
||||
; CHECK-UNWINDER-NEXT: 7a0-7ab:3
|
||||
|
@ -132,33 +132,33 @@
|
|||
; CHECK-UNWINDER-NEXT: 7a7->7b2:1
|
||||
; CHECK-UNWINDER-NEXT: 7ab->7a0:4
|
||||
; CHECK-UNWINDER-NEXT: 7b5->7c0:1
|
||||
; CHECK-UNWINDER-NEXT: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6]
|
||||
; CHECK-UNWINDER-NEXT: [0x842 @ 0x7d4 @ 0x7e0 @ 0x7ab @ 0x7b5]
|
||||
; CHECK-UNWINDER-NEXT: 1
|
||||
; CHECK-UNWINDER-NEXT: 7c0-7d4:1
|
||||
; CHECK-UNWINDER-NEXT: 1
|
||||
; CHECK-UNWINDER-NEXT: 7d4->7c0:1
|
||||
; CHECK-UNWINDER-NEXT: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8]
|
||||
; CHECK-UNWINDER-NEXT: [0x842 @ 0x7d4 @ 0x7e0 @ 0x7ab @ 0x7b5 @ 0x7d4]
|
||||
; CHECK-UNWINDER-NEXT: 2
|
||||
; CHECK-UNWINDER-NEXT: 7c0-7cd:1
|
||||
; CHECK-UNWINDER-NEXT: 7db-7e0:1
|
||||
; CHECK-UNWINDER-NEXT: 2
|
||||
; CHECK-UNWINDER-NEXT: 7cd->7db:1
|
||||
; CHECK-UNWINDER-NEXT: 7e0->7a0:1
|
||||
; CHECK-UNWINDER-NEXT: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7]
|
||||
; CHECK-UNWINDER-NEXT: [0x842 @ 0x7d4 @ 0x7e0 @ 0x7ab @ 0x7b5 @ 0x7d4 @ 0x7e0]
|
||||
; CHECK-UNWINDER-NEXT: 2
|
||||
; CHECK-UNWINDER-NEXT: 7a0-7a7:1
|
||||
; CHECK-UNWINDER-NEXT: 7b2-7b5:1
|
||||
; CHECK-UNWINDER-NEXT: 2
|
||||
; CHECK-UNWINDER-NEXT: 7a7->7b2:1
|
||||
; CHECK-UNWINDER-NEXT: 7b5->7c0:1
|
||||
; CHECK-UNWINDER-NEXT: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6]
|
||||
; CHECK-UNWINDER-NEXT: [0x842 @ 0x7d4 @ 0x7e0 @ 0x7ab @ 0x7b5 @ 0x7d4 @ 0x7e0 @ 0x7b5]
|
||||
; CHECK-UNWINDER-NEXT: 2
|
||||
; CHECK-UNWINDER-NEXT: 7c0-7cd:2
|
||||
; CHECK-UNWINDER-NEXT: 7db-7e0:1
|
||||
; CHECK-UNWINDER-NEXT: 2
|
||||
; CHECK-UNWINDER-NEXT: 7cd->7db:2
|
||||
; CHECK-UNWINDER-NEXT: 7e0->7a0:1
|
||||
; CHECK-UNWINDER-NEXT: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7]
|
||||
; CHECK-UNWINDER-NEXT: [0x842 @ 0x7d4 @ 0x7e0 @ 0x7ab @ 0x7b5 @ 0x7d4 @ 0x7e0 @ 0x7b5 @ 0x7e0]
|
||||
; CHECK-UNWINDER-NEXT: 2
|
||||
; CHECK-UNWINDER-NEXT: 7a0-7a7:1
|
||||
; CHECK-UNWINDER-NEXT: 7b2-7b5:1
|
||||
|
|
|
@ -179,17 +179,12 @@ std::shared_ptr<StringBasedCtxKey> FrameStack::getContextKey() {
|
|||
return KeyStr;
|
||||
}
|
||||
|
||||
std::shared_ptr<ProbeBasedCtxKey> ProbeStack::getContextKey() {
|
||||
std::shared_ptr<ProbeBasedCtxKey> ProbeBasedKey =
|
||||
std::make_shared<ProbeBasedCtxKey>();
|
||||
for (auto CallProbe : Stack) {
|
||||
ProbeBasedKey->Probes.emplace_back(CallProbe);
|
||||
}
|
||||
CSProfileGenerator::compressRecursionContext<const MCDecodedPseudoProbe *>(
|
||||
ProbeBasedKey->Probes);
|
||||
CSProfileGenerator::trimContext<const MCDecodedPseudoProbe *>(
|
||||
ProbeBasedKey->Probes);
|
||||
return ProbeBasedKey;
|
||||
std::shared_ptr<AddrBasedCtxKey> AddressStack::getContextKey() {
|
||||
std::shared_ptr<AddrBasedCtxKey> KeyStr = std::make_shared<AddrBasedCtxKey>();
|
||||
KeyStr->Context = Stack;
|
||||
CSProfileGenerator::compressRecursionContext<uint64_t>(KeyStr->Context);
|
||||
CSProfileGenerator::trimContext<uint64_t>(KeyStr->Context);
|
||||
return KeyStr;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -252,8 +247,8 @@ void VirtualUnwinder::collectSamplesFromFrameTrie(
|
|||
void VirtualUnwinder::collectSamplesFromFrameTrie(
|
||||
UnwindState::ProfiledFrame *Cur) {
|
||||
if (Binary->usePseudoProbes()) {
|
||||
ProbeStack Stack(Binary);
|
||||
collectSamplesFromFrameTrie<ProbeStack>(Cur, Stack);
|
||||
AddressStack Stack(Binary);
|
||||
collectSamplesFromFrameTrie<AddressStack>(Cur, Stack);
|
||||
} else {
|
||||
FrameStack Stack(Binary);
|
||||
collectSamplesFromFrameTrie<FrameStack>(Cur, Stack);
|
||||
|
@ -461,14 +456,17 @@ static std::string getContextKeyStr(ContextKey *K,
|
|||
const ProfiledBinary *Binary) {
|
||||
if (const auto *CtxKey = dyn_cast<StringBasedCtxKey>(K)) {
|
||||
return SampleContext::getContextString(CtxKey->Context);
|
||||
} else if (const auto *CtxKey = dyn_cast<ProbeBasedCtxKey>(K)) {
|
||||
SampleContextFrameVector ContextStack;
|
||||
for (const auto *Probe : CtxKey->Probes) {
|
||||
Binary->getInlineContextForProbe(Probe, ContextStack, true);
|
||||
} else if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(K)) {
|
||||
std::ostringstream OContextStr;
|
||||
for (uint32_t I = 0; I < CtxKey->Context.size(); I++) {
|
||||
if (OContextStr.str().size())
|
||||
OContextStr << " @ ";
|
||||
OContextStr << "0x"
|
||||
<< to_hexString(
|
||||
Binary->virtualAddrToOffset(CtxKey->Context[I]),
|
||||
false);
|
||||
}
|
||||
// Probe context key at this point does not have leaf probe, so do not
|
||||
// include the leaf inline location.
|
||||
return SampleContext::getContextString(ContextStack, true);
|
||||
return OContextStr.str();
|
||||
} else {
|
||||
llvm_unreachable("unexpected key type");
|
||||
}
|
||||
|
|
|
@ -333,7 +333,7 @@ struct ContextKey {
|
|||
};
|
||||
|
||||
// Utilities for LLVM-style RTTI
|
||||
enum ContextKind { CK_StringBased, CK_ProbeBased };
|
||||
enum ContextKind { CK_StringBased, CK_AddrBased };
|
||||
const ContextKind Kind;
|
||||
ContextKind getKind() const { return Kind; }
|
||||
ContextKey(ContextKind K) : Kind(K){};
|
||||
|
@ -359,34 +359,23 @@ struct StringBasedCtxKey : public ContextKey {
|
|||
}
|
||||
};
|
||||
|
||||
// Probe based context key as the intermediate key of context
|
||||
// String based context key will introduce redundant string handling
|
||||
// since the callee context is inferred from the context string which
|
||||
// need to be splitted by '@' to get the last location frame, so we
|
||||
// can just use probe instead and generate the string in the end.
|
||||
struct ProbeBasedCtxKey : public ContextKey {
|
||||
SmallVector<const MCDecodedPseudoProbe *, 16> Probes;
|
||||
// Address-based context id
|
||||
struct AddrBasedCtxKey : public ContextKey {
|
||||
SmallVector<uint64_t, 16> Context;
|
||||
|
||||
ProbeBasedCtxKey() : ContextKey(CK_ProbeBased) {}
|
||||
bool WasLeafInlined;
|
||||
AddrBasedCtxKey() : ContextKey(CK_AddrBased), WasLeafInlined(false){};
|
||||
static bool classof(const ContextKey *K) {
|
||||
return K->getKind() == CK_ProbeBased;
|
||||
return K->getKind() == CK_AddrBased;
|
||||
}
|
||||
|
||||
bool isEqual(const ContextKey *K) const override {
|
||||
const ProbeBasedCtxKey *O = dyn_cast<ProbeBasedCtxKey>(K);
|
||||
assert(O != nullptr && "Probe based key shouldn't be null in isEqual");
|
||||
return std::equal(Probes.begin(), Probes.end(), O->Probes.begin(),
|
||||
O->Probes.end());
|
||||
const AddrBasedCtxKey *Other = dyn_cast<AddrBasedCtxKey>(K);
|
||||
return Context == Other->Context;
|
||||
}
|
||||
|
||||
void genHashCode() override {
|
||||
for (const auto *P : Probes) {
|
||||
HashCode = hash_combine(HashCode, P);
|
||||
}
|
||||
if (HashCode == 0) {
|
||||
// Avoid zero value of HashCode when it's an empty list
|
||||
HashCode = 1;
|
||||
}
|
||||
HashCode = hash_combine_range(Context.begin(), Context.end());
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -433,22 +422,14 @@ struct FrameStack {
|
|||
std::shared_ptr<StringBasedCtxKey> getContextKey();
|
||||
};
|
||||
|
||||
struct ProbeStack {
|
||||
SmallVector<const MCDecodedPseudoProbe *, 16> Stack;
|
||||
struct AddressStack {
|
||||
SmallVector<uint64_t, 16> Stack;
|
||||
ProfiledBinary *Binary;
|
||||
ProbeStack(ProfiledBinary *B) : Binary(B) {}
|
||||
AddressStack(ProfiledBinary *B) : Binary(B) {}
|
||||
bool pushFrame(UnwindState::ProfiledFrame *Cur) {
|
||||
assert(!Cur->isExternalFrame() &&
|
||||
"External frame's not expected for context stack.");
|
||||
const MCDecodedPseudoProbe *CallProbe =
|
||||
Binary->getCallProbeForAddr(Cur->Address);
|
||||
// We may not find a probe for a merged or external callsite.
|
||||
// Callsite merging may cause the loss of original probe IDs.
|
||||
// Cutting off the context from here since the inliner will
|
||||
// not know how to consume a context with unknown callsites.
|
||||
if (!CallProbe)
|
||||
return false;
|
||||
Stack.push_back(CallProbe);
|
||||
Stack.push_back(Cur->Address);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -456,18 +437,7 @@ struct ProbeStack {
|
|||
if (!Stack.empty())
|
||||
Stack.pop_back();
|
||||
}
|
||||
// Use pseudo probe based context key to get the sample counter
|
||||
// A context stands for a call path from 'main' to an uninlined
|
||||
// callee with all inline frames recovered on that path. The probes
|
||||
// belonging to that call path is the probes either originated from
|
||||
// the callee or from any functions inlined into the callee. Since
|
||||
// pseudo probes are organized in a tri-tree style after decoded,
|
||||
// the tree path from the tri-tree root (which is the uninlined
|
||||
// callee) to the probe node forms an inline context.
|
||||
// Here we use a list of probe(pointer) as the context key to speed up
|
||||
// aggregation and the final context string will be generate in
|
||||
// ProfileGenerator
|
||||
std::shared_ptr<ProbeBasedCtxKey> getContextKey();
|
||||
std::shared_ptr<AddrBasedCtxKey> getContextKey();
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
|
@ -5,12 +5,12 @@
|
|||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "ProfileGenerator.h"
|
||||
#include "ErrorHandling.h"
|
||||
#include "ProfiledBinary.h"
|
||||
#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
|
||||
#include "llvm/ProfileData/ProfileCommon.h"
|
||||
#include <algorithm>
|
||||
#include <float.h>
|
||||
#include <unordered_set>
|
||||
|
||||
|
@ -370,6 +370,39 @@ void ProfileGeneratorBase::updateTotalSamples() {
|
|||
}
|
||||
}
|
||||
|
||||
void ProfileGeneratorBase::collectProfiledFunctions() {
|
||||
std::unordered_set<const BinaryFunction *> ProfiledFunctions;
|
||||
// Go through all the stacks, ranges and branches in sample counters, use the
|
||||
// start of the range to look up the function it belongs and record the
|
||||
// function.
|
||||
for (const auto &CI : SampleCounters) {
|
||||
if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) {
|
||||
for (auto Addr : CtxKey->Context) {
|
||||
if (FuncRange *FRange = Binary->findFuncRangeForOffset(
|
||||
Binary->virtualAddrToOffset(Addr)))
|
||||
ProfiledFunctions.insert(FRange->Func);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto Item : CI.second.RangeCounter) {
|
||||
uint64_t StartOffset = Item.first.first;
|
||||
if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset))
|
||||
ProfiledFunctions.insert(FRange->Func);
|
||||
}
|
||||
|
||||
for (auto Item : CI.second.BranchCounter) {
|
||||
uint64_t SourceOffset = Item.first.first;
|
||||
uint64_t TargetOffset = Item.first.first;
|
||||
if (FuncRange *FRange = Binary->findFuncRangeForOffset(SourceOffset))
|
||||
ProfiledFunctions.insert(FRange->Func);
|
||||
if (FuncRange *FRange = Binary->findFuncRangeForOffset(TargetOffset))
|
||||
ProfiledFunctions.insert(FRange->Func);
|
||||
}
|
||||
}
|
||||
|
||||
Binary->setProfiledFunctions(ProfiledFunctions);
|
||||
}
|
||||
|
||||
FunctionSamples &
|
||||
ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
|
||||
SampleContext Context(FuncName);
|
||||
|
@ -382,6 +415,7 @@ ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
|
|||
}
|
||||
|
||||
void ProfileGenerator::generateProfile() {
|
||||
collectProfiledFunctions();
|
||||
if (Binary->usePseudoProbes()) {
|
||||
generateProbeBasedProfile();
|
||||
} else {
|
||||
|
@ -428,6 +462,7 @@ void ProfileGenerator::generateLineNumBasedProfile() {
|
|||
void ProfileGenerator::generateProbeBasedProfile() {
|
||||
assert(SampleCounters.size() == 1 &&
|
||||
"Must have one entry for profile generation.");
|
||||
Binary->decodePseudoProbe();
|
||||
// Enable pseudo probe functionalities in SampleProf
|
||||
FunctionSamples::ProfileIsProbeBased = true;
|
||||
const SampleCounter &SC = SampleCounters.begin()->second;
|
||||
|
@ -442,16 +477,18 @@ void ProfileGenerator::generateProbeBasedProfile() {
|
|||
void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions(
|
||||
const RangeSample &RangeCounter) {
|
||||
ProbeCounterMap ProbeCounter;
|
||||
// preprocessRangeCounter returns disjoint ranges, so no longer to redo it inside
|
||||
// extractProbesFromRange.
|
||||
extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter, false);
|
||||
// preprocessRangeCounter returns disjoint ranges, so no longer to redo it
|
||||
// inside extractProbesFromRange.
|
||||
extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter,
|
||||
false);
|
||||
|
||||
for (const auto &PI : ProbeCounter) {
|
||||
const MCDecodedPseudoProbe *Probe = PI.first;
|
||||
uint64_t Count = PI.second;
|
||||
SampleContextFrameVector FrameVec;
|
||||
Binary->getInlineContextForProbe(Probe, FrameVec, true);
|
||||
FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(FrameVec, Count);
|
||||
FunctionSamples &FunctionProfile =
|
||||
getLeafProfileAndAddTotalSamples(FrameVec, Count);
|
||||
FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
|
||||
if (Probe->isEntry())
|
||||
FunctionProfile.addHeadSamples(Count);
|
||||
|
@ -496,7 +533,8 @@ FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples(
|
|||
&getTopLevelFunctionProfile(FrameVec[0].FuncName);
|
||||
FunctionProfile->addTotalSamples(Count);
|
||||
if (Binary->usePseudoProbes()) {
|
||||
const auto *FuncDesc = Binary->getFuncDescForGUID(Function::getGUID(FunctionProfile->getName()));
|
||||
const auto *FuncDesc = Binary->getFuncDescForGUID(
|
||||
Function::getGUID(FunctionProfile->getName()));
|
||||
FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
|
||||
}
|
||||
|
||||
|
@ -515,7 +553,8 @@ FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples(
|
|||
FunctionProfile = &Ret.first->second;
|
||||
FunctionProfile->addTotalSamples(Count);
|
||||
if (Binary->usePseudoProbes()) {
|
||||
const auto *FuncDesc = Binary->getFuncDescForGUID(Function::getGUID(FunctionProfile->getName()));
|
||||
const auto *FuncDesc = Binary->getFuncDescForGUID(
|
||||
Function::getGUID(FunctionProfile->getName()));
|
||||
FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
|
||||
}
|
||||
}
|
||||
|
@ -646,32 +685,23 @@ FunctionSamples &CSProfileGenerator::getFunctionProfileForContext(
|
|||
void CSProfileGenerator::generateProfile() {
|
||||
FunctionSamples::ProfileIsCSFlat = true;
|
||||
|
||||
if (Binary->getTrackFuncContextSize())
|
||||
computeSizeForProfiledFunctions();
|
||||
collectProfiledFunctions();
|
||||
|
||||
if (Binary->usePseudoProbes()) {
|
||||
generateProbeBasedProfile();
|
||||
} else {
|
||||
generateLineNumBasedProfile();
|
||||
}
|
||||
|
||||
if (Binary->getTrackFuncContextSize())
|
||||
computeSizeForProfiledFunctions();
|
||||
|
||||
postProcessProfiles();
|
||||
}
|
||||
|
||||
void CSProfileGenerator::computeSizeForProfiledFunctions() {
|
||||
std::unordered_set<const BinaryFunction *> ProfiledFunctions;
|
||||
|
||||
// Go through all the ranges in the CS counters, use the start of the range to
|
||||
// look up the function it belongs and record the function.
|
||||
for (const auto &CI : SampleCounters) {
|
||||
for (const auto &Item : CI.second.RangeCounter) {
|
||||
// FIXME: Filter the bogus crossing function range.
|
||||
uint64_t StartOffset = Item.first.first;
|
||||
if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset))
|
||||
ProfiledFunctions.insert(FRange->Func);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto *Func : ProfiledFunctions)
|
||||
for (auto *Func : Binary->getProfiledFunctions())
|
||||
Binary->computeInlinedContextSizeForFunc(Func);
|
||||
|
||||
// Flush the symbolizer to save memory.
|
||||
|
@ -907,25 +937,44 @@ void ProfileGeneratorBase::extractProbesFromRange(
|
|||
}
|
||||
}
|
||||
|
||||
// Helper function to extract context prefix string stack
|
||||
// Extract context stack for reusing, leaf context stack will
|
||||
// be added compressed while looking up function profile
|
||||
static void extractPrefixContextStack(
|
||||
SampleContextFrameVector &ContextStack,
|
||||
const SmallVectorImpl<const MCDecodedPseudoProbe *> &Probes,
|
||||
static void
|
||||
extractPrefixContextStack(SampleContextFrameVector &ContextStack,
|
||||
const SmallVectorImpl<uint64_t> &Addresses,
|
||||
ProfiledBinary *Binary) {
|
||||
SmallVector<const MCDecodedPseudoProbe *, 16> Probes;
|
||||
for (auto Addr : reverse(Addresses)) {
|
||||
const MCDecodedPseudoProbe *CallProbe = Binary->getCallProbeForAddr(Addr);
|
||||
// These could be the cases when a probe is not found at a calliste. Cutting
|
||||
// off the context from here since the inliner will not know how to consume
|
||||
// a context with unknown callsites.
|
||||
// 1. for functions that are not sampled when
|
||||
// --decode-probe-for-profiled-functions-only is on.
|
||||
// 2. for a merged callsite. Callsite merging may cause the loss of original
|
||||
// probe IDs.
|
||||
// 3. for an external callsite.
|
||||
if (!CallProbe)
|
||||
break;
|
||||
Probes.push_back(CallProbe);
|
||||
}
|
||||
|
||||
std::reverse(Probes.begin(), Probes.end());
|
||||
|
||||
// Extract context stack for reusing, leaf context stack will be added
|
||||
// compressed while looking up function profile.
|
||||
for (const auto *P : Probes) {
|
||||
Binary->getInlineContextForProbe(P, ContextStack, true);
|
||||
}
|
||||
}
|
||||
|
||||
void CSProfileGenerator::generateProbeBasedProfile() {
|
||||
Binary->decodePseudoProbe();
|
||||
// Enable pseudo probe functionalities in SampleProf
|
||||
FunctionSamples::ProfileIsProbeBased = true;
|
||||
for (const auto &CI : SampleCounters) {
|
||||
const auto *CtxKey = cast<ProbeBasedCtxKey>(CI.first.getPtr());
|
||||
const AddrBasedCtxKey *CtxKey =
|
||||
dyn_cast<AddrBasedCtxKey>(CI.first.getPtr());
|
||||
SampleContextFrameVector ContextStack;
|
||||
extractPrefixContextStack(ContextStack, CtxKey->Probes, Binary);
|
||||
extractPrefixContextStack(ContextStack, CtxKey->Context, Binary);
|
||||
// Fill in function body samples from probes, also infer caller's samples
|
||||
// from callee's probe
|
||||
populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack);
|
||||
|
|
|
@ -106,6 +106,8 @@ protected:
|
|||
|
||||
void showDensitySuggestion(double Density);
|
||||
|
||||
void collectProfiledFunctions();
|
||||
|
||||
// Thresholds from profile summary to answer isHotCount/isColdCount queries.
|
||||
uint64_t HotCountThreshold;
|
||||
|
||||
|
|
|
@ -156,7 +156,8 @@ void BinarySizeContextTracker::trackInlineesOptimizedAway(
|
|||
for (const auto &ChildNode : ProbeNode.getChildren()) {
|
||||
InlineSite Location = ChildNode.first;
|
||||
ProbeContext.back().second = std::get<1>(Location);
|
||||
trackInlineesOptimizedAway(ProbeDecoder, *ChildNode.second.get(), ProbeContext);
|
||||
trackInlineesOptimizedAway(ProbeDecoder, *ChildNode.second.get(),
|
||||
ProbeContext);
|
||||
}
|
||||
|
||||
ProbeContext.pop_back();
|
||||
|
@ -208,7 +209,9 @@ void ProfiledBinary::load() {
|
|||
// Find the preferred load address for text sections.
|
||||
setPreferredTextSegmentAddresses(Obj);
|
||||
|
||||
// Decode pseudo probe related section
|
||||
checkPseudoProbe(Obj);
|
||||
|
||||
if (ShowDisassemblyOnly)
|
||||
decodePseudoProbe(Obj);
|
||||
|
||||
// Load debug info of subprograms from DWARF section.
|
||||
|
@ -287,7 +290,8 @@ ProfiledBinary::getExpandedContext(const SmallVectorImpl<uint64_t> &Stack,
|
|||
}
|
||||
|
||||
template <class ELFT>
|
||||
void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj, StringRef FileName) {
|
||||
void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj,
|
||||
StringRef FileName) {
|
||||
const auto &PhdrRange = unwrapOrError(Obj.program_headers(), FileName);
|
||||
// FIXME: This should be the page size of the system running profiling.
|
||||
// However such info isn't available at post-processing time, assuming
|
||||
|
@ -311,7 +315,8 @@ void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj,
|
|||
exitWithError("no executable segment found", FileName);
|
||||
}
|
||||
|
||||
void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFObjectFileBase *Obj) {
|
||||
void ProfiledBinary::setPreferredTextSegmentAddresses(
|
||||
const ELFObjectFileBase *Obj) {
|
||||
if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
|
||||
setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
|
||||
else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
|
||||
|
@ -324,10 +329,38 @@ void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFObjectFileBase *O
|
|||
llvm_unreachable("invalid ELF object format");
|
||||
}
|
||||
|
||||
void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) {
|
||||
void ProfiledBinary::checkPseudoProbe(const ELFObjectFileBase *Obj) {
|
||||
if (UseDwarfCorrelation)
|
||||
return;
|
||||
|
||||
bool HasProbeDescSection = false;
|
||||
bool HasPseudoProbeSection = false;
|
||||
|
||||
StringRef FileName = Obj->getFileName();
|
||||
for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
|
||||
SI != SE; ++SI) {
|
||||
const SectionRef &Section = *SI;
|
||||
StringRef SectionName = unwrapOrError(Section.getName(), FileName);
|
||||
if (SectionName == ".pseudo_probe_desc") {
|
||||
HasProbeDescSection = true;
|
||||
} else if (SectionName == ".pseudo_probe") {
|
||||
HasPseudoProbeSection = true;
|
||||
}
|
||||
}
|
||||
|
||||
// set UsePseudoProbes flag, used for PerfReader
|
||||
UsePseudoProbes = HasProbeDescSection && HasPseudoProbeSection;
|
||||
}
|
||||
|
||||
void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) {
|
||||
if (!UsePseudoProbes)
|
||||
return;
|
||||
|
||||
std::unordered_set<uint64_t> ProfiledGuids;
|
||||
if (!ShowDisassemblyOnly)
|
||||
for (auto *F : ProfiledFunctions)
|
||||
ProfiledGuids.insert(Function::getGUID(F->FuncName));
|
||||
|
||||
StringRef FileName = Obj->getFileName();
|
||||
for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
|
||||
SI != SE; ++SI) {
|
||||
|
@ -339,21 +372,20 @@ void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) {
|
|||
if (!ProbeDecoder.buildGUID2FuncDescMap(
|
||||
reinterpret_cast<const uint8_t *>(Contents.data()),
|
||||
Contents.size()))
|
||||
exitWithError("Pseudo Probe decoder fail in .pseudo_probe_desc section");
|
||||
exitWithError(
|
||||
"Pseudo Probe decoder fail in .pseudo_probe_desc section");
|
||||
} else if (SectionName == ".pseudo_probe") {
|
||||
StringRef Contents = unwrapOrError(Section.getContents(), FileName);
|
||||
if (!ProbeDecoder.buildAddress2ProbeMap(
|
||||
reinterpret_cast<const uint8_t *>(Contents.data()),
|
||||
Contents.size()))
|
||||
Contents.size(), ProfiledGuids))
|
||||
exitWithError("Pseudo Probe decoder fail in .pseudo_probe section");
|
||||
// set UsePseudoProbes flag, used for PerfReader
|
||||
UsePseudoProbes = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Build TopLevelProbeFrameMap to track size for optimized inlinees when probe
|
||||
// is available
|
||||
if (UsePseudoProbes && TrackFuncContextSize) {
|
||||
if (TrackFuncContextSize) {
|
||||
for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) {
|
||||
auto *Frame = Child.second.get();
|
||||
StringRef FuncName =
|
||||
|
@ -366,6 +398,13 @@ void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) {
|
|||
ProbeDecoder.printGUID2FuncDescMap(outs());
|
||||
}
|
||||
|
||||
void ProfiledBinary::decodePseudoProbe() {
|
||||
OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path);
|
||||
Binary &ExeBinary = *OBinary.getBinary();
|
||||
auto *Obj = dyn_cast<ELFObjectFileBase>(&ExeBinary);
|
||||
decodePseudoProbe(Obj);
|
||||
}
|
||||
|
||||
void ProfiledBinary::setIsFuncEntry(uint64_t Offset, StringRef RangeSymName) {
|
||||
// Note that the start offset of each ELF section can be a non-function
|
||||
// symbol, we need to binary search for the start of a real function range.
|
||||
|
|
|
@ -218,6 +218,9 @@ class ProfiledBinary {
|
|||
// A map of mapping function name to BinaryFunction info.
|
||||
std::unordered_map<std::string, BinaryFunction> BinaryFunctions;
|
||||
|
||||
// A list of binary functions that have samples.
|
||||
std::unordered_set<const BinaryFunction *> ProfiledFunctions;
|
||||
|
||||
// An ordered map of mapping function's start offset to function range
|
||||
// relevant info. Currently to determine if the offset of ELF is the start of
|
||||
// a real function, we leverage the function range info from DWARF.
|
||||
|
@ -278,6 +281,8 @@ class ProfiledBinary {
|
|||
template <class ELFT>
|
||||
void setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj, StringRef FileName);
|
||||
|
||||
void checkPseudoProbe(const ELFObjectFileBase *Obj);
|
||||
|
||||
void decodePseudoProbe(const ELFObjectFileBase *Obj);
|
||||
|
||||
void
|
||||
|
@ -331,6 +336,9 @@ public:
|
|||
setupSymbolizer();
|
||||
load();
|
||||
}
|
||||
|
||||
void decodePseudoProbe();
|
||||
|
||||
uint64_t virtualAddrToOffset(uint64_t VirtualAddress) const {
|
||||
return VirtualAddress - BaseAddress;
|
||||
}
|
||||
|
@ -453,6 +461,14 @@ public:
|
|||
return BinaryFunctions;
|
||||
}
|
||||
|
||||
std::unordered_set<const BinaryFunction *> &getProfiledFunctions() {
|
||||
return ProfiledFunctions;
|
||||
}
|
||||
|
||||
void setProfiledFunctions(std::unordered_set<const BinaryFunction *> &Funcs) {
|
||||
ProfiledFunctions = Funcs;
|
||||
}
|
||||
|
||||
BinaryFunction *getBinaryFunction(StringRef FName) {
|
||||
auto I = BinaryFunctions.find(FName.str());
|
||||
if (I == BinaryFunctions.end())
|
||||
|
|
Loading…
Reference in New Issue