forked from OSchip/llvm-project
[llvm-profgen] On-demand symbolization
Previously we do symbolization for all the functions and actually we only need the symbols that's hit by the samples. This can significantly speed up the time for large size binary. Optimization for per-inliner will come along with next patch. Reviewed By: hoy, wenlei Differential Revision: https://reviews.llvm.org/D110465
This commit is contained in:
parent
70391b3468
commit
091c16f76b
|
@ -388,8 +388,8 @@ using ContextSampleCounterMap =
|
|||
|
||||
struct FrameStack {
|
||||
SmallVector<uint64_t, 16> Stack;
|
||||
const ProfiledBinary *Binary;
|
||||
FrameStack(const ProfiledBinary *B) : Binary(B) {}
|
||||
ProfiledBinary *Binary;
|
||||
FrameStack(ProfiledBinary *B) : Binary(B) {}
|
||||
bool pushFrame(UnwindState::ProfiledFrame *Cur) {
|
||||
Stack.push_back(Cur->Address);
|
||||
return true;
|
||||
|
@ -404,8 +404,8 @@ struct FrameStack {
|
|||
|
||||
struct ProbeStack {
|
||||
SmallVector<const MCDecodedPseudoProbe *, 16> Stack;
|
||||
const ProfiledBinary *Binary;
|
||||
ProbeStack(const ProfiledBinary *B) : Binary(B) {}
|
||||
ProfiledBinary *Binary;
|
||||
ProbeStack(ProfiledBinary *B) : Binary(B) {}
|
||||
bool pushFrame(UnwindState::ProfiledFrame *Cur) {
|
||||
const MCDecodedPseudoProbe *CallProbe =
|
||||
Binary->getCallProbeForAddr(Cur->Address);
|
||||
|
@ -458,7 +458,7 @@ range as sample counter for further CS profile generation.
|
|||
*/
|
||||
class VirtualUnwinder {
|
||||
public:
|
||||
VirtualUnwinder(ContextSampleCounterMap *Counter, const ProfiledBinary *B)
|
||||
VirtualUnwinder(ContextSampleCounterMap *Counter, ProfiledBinary *B)
|
||||
: CtxCounterMap(Counter), Binary(B) {}
|
||||
bool unwind(const PerfSample *Sample, uint64_t Repeat);
|
||||
std::set<uint64_t> &getUntrackedCallsites() { return UntrackedCallsites; }
|
||||
|
@ -495,7 +495,7 @@ private:
|
|||
|
||||
ContextSampleCounterMap *CtxCounterMap;
|
||||
// Profiled binary that current frame address belongs to
|
||||
const ProfiledBinary *Binary;
|
||||
ProfiledBinary *Binary;
|
||||
// Keep track of all untracked callsites
|
||||
std::set<uint64_t> UntrackedCallsites;
|
||||
};
|
||||
|
|
|
@ -184,8 +184,7 @@ void ProfiledBinary::load() {
|
|||
// TODO: decode other sections.
|
||||
}
|
||||
|
||||
bool ProfiledBinary::inlineContextEqual(uint64_t Address1,
|
||||
uint64_t Address2) const {
|
||||
bool ProfiledBinary::inlineContextEqual(uint64_t Address1, uint64_t Address2) {
|
||||
uint64_t Offset1 = virtualAddrToOffset(Address1);
|
||||
uint64_t Offset2 = virtualAddrToOffset(Address2);
|
||||
const SampleContextFrameVector &Context1 = getFrameLocationStack(Offset1);
|
||||
|
@ -202,7 +201,7 @@ bool ProfiledBinary::inlineContextEqual(uint64_t Address1,
|
|||
|
||||
SampleContextFrameVector
|
||||
ProfiledBinary::getExpandedContext(const SmallVectorImpl<uint64_t> &Stack,
|
||||
bool &WasLeafInlined) const {
|
||||
bool &WasLeafInlined) {
|
||||
SampleContextFrameVector ContextVec;
|
||||
// Process from frame root to leaf
|
||||
for (auto Address : Stack) {
|
||||
|
@ -358,7 +357,7 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
|
|||
// We don't need symbolized info for probe-based profile, just use an
|
||||
// empty stack as an entry to indicate a valid binary offset
|
||||
SampleContextFrameVector SymbolizedCallStack;
|
||||
if (!UsePseudoProbes || TrackFuncContextSize) {
|
||||
if (TrackFuncContextSize) {
|
||||
InstructionPointer IP(this, Offset);
|
||||
// TODO: reallocation of Offset2LocStackMap will lead to dangling
|
||||
// strings We need ProfiledBinary to owned these string.
|
||||
|
@ -369,9 +368,9 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
|
|||
if (TrackFuncContextSize && !SymbolizedCallStack.empty())
|
||||
FuncSizeTracker.addInstructionForContext(Offset2LocStackMap[Offset],
|
||||
Size);
|
||||
} else {
|
||||
Offset2LocStackMap[Offset] = SampleContextFrameVector();
|
||||
}
|
||||
// Record instruction size.
|
||||
Offset2InstSizeMap[Offset] = Size;
|
||||
|
||||
// Populate address maps.
|
||||
CodeAddrs.push_back(Offset);
|
||||
|
|
|
@ -169,6 +169,10 @@ class ProfiledBinary {
|
|||
std::map<uint64_t, std::pair<std::string, uint64_t>> FuncStartOffsetMap;
|
||||
// Offset to context location map. Used to expand the context.
|
||||
std::unordered_map<uint64_t, SampleContextFrameVector> Offset2LocStackMap;
|
||||
|
||||
// Offset to instruction size map. Also used for quick offset lookup.
|
||||
std::unordered_map<uint64_t, uint64_t> Offset2InstSizeMap;
|
||||
|
||||
// An array of offsets of all instructions sorted in increasing order. The
|
||||
// sorting is needed to fast advance to the next forward/backward instruction.
|
||||
std::vector<uint64_t> CodeAddrs;
|
||||
|
@ -269,7 +273,7 @@ public:
|
|||
|
||||
bool addressIsCode(uint64_t Address) const {
|
||||
uint64_t Offset = virtualAddrToOffset(Address);
|
||||
return Offset2LocStackMap.find(Offset) != Offset2LocStackMap.end();
|
||||
return Offset2InstSizeMap.find(Offset) != Offset2InstSizeMap.end();
|
||||
}
|
||||
bool addressIsCall(uint64_t Address) const {
|
||||
uint64_t Offset = virtualAddrToOffset(Address);
|
||||
|
@ -326,11 +330,14 @@ public:
|
|||
return FuncSizeTracker.getFuncSizeForContext(Context);
|
||||
}
|
||||
|
||||
const SampleContextFrameVector &getFrameLocationStack(uint64_t Offset) const {
|
||||
auto I = Offset2LocStackMap.find(Offset);
|
||||
assert(I != Offset2LocStackMap.end() &&
|
||||
"Can't find location for offset in the binary");
|
||||
return I->second;
|
||||
const SampleContextFrameVector &
|
||||
getFrameLocationStack(uint64_t Offset, bool UseProbeDiscriminator = false) {
|
||||
auto I = Offset2LocStackMap.emplace(Offset, SampleContextFrameVector());
|
||||
if (I.second) {
|
||||
InstructionPointer IP(this, Offset);
|
||||
I.first->second = symbolize(IP, true, UseProbeDiscriminator);
|
||||
}
|
||||
return I.first->second;
|
||||
}
|
||||
|
||||
Optional<SampleContextFrame> getInlineLeafFrameLoc(uint64_t Offset) {
|
||||
|
@ -341,14 +348,14 @@ public:
|
|||
}
|
||||
|
||||
// Compare two addresses' inline context
|
||||
bool inlineContextEqual(uint64_t Add1, uint64_t Add2) const;
|
||||
bool inlineContextEqual(uint64_t Add1, uint64_t Add2);
|
||||
|
||||
// Get the full context of the current stack with inline context filled in.
|
||||
// It will search the disassembling info stored in Offset2LocStackMap. This is
|
||||
// used as the key of function sample map
|
||||
SampleContextFrameVector
|
||||
getExpandedContext(const SmallVectorImpl<uint64_t> &Stack,
|
||||
bool &WasLeafInlined) const;
|
||||
bool &WasLeafInlined);
|
||||
|
||||
const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const {
|
||||
return ProbeDecoder.getCallProbeForAddr(Address);
|
||||
|
|
Loading…
Reference in New Issue