[llvm-profgen] On-demand symbolization

Previously we do symbolization for all the functions and actually we only need the symbols that's hit by the samples.

This can significantly speed up the time for large size binary.

Optimization for per-inliner will come along with next patch.

Reviewed By: hoy, wenlei

Differential Revision: https://reviews.llvm.org/D110465
This commit is contained in:
wlei 2021-09-24 17:06:39 -07:00
parent 70391b3468
commit 091c16f76b
3 changed files with 26 additions and 20 deletions

View File

@ -388,8 +388,8 @@ using ContextSampleCounterMap =
struct FrameStack {
SmallVector<uint64_t, 16> Stack;
const ProfiledBinary *Binary;
FrameStack(const ProfiledBinary *B) : Binary(B) {}
ProfiledBinary *Binary;
FrameStack(ProfiledBinary *B) : Binary(B) {}
bool pushFrame(UnwindState::ProfiledFrame *Cur) {
Stack.push_back(Cur->Address);
return true;
@ -404,8 +404,8 @@ struct FrameStack {
struct ProbeStack {
SmallVector<const MCDecodedPseudoProbe *, 16> Stack;
const ProfiledBinary *Binary;
ProbeStack(const ProfiledBinary *B) : Binary(B) {}
ProfiledBinary *Binary;
ProbeStack(ProfiledBinary *B) : Binary(B) {}
bool pushFrame(UnwindState::ProfiledFrame *Cur) {
const MCDecodedPseudoProbe *CallProbe =
Binary->getCallProbeForAddr(Cur->Address);
@ -458,7 +458,7 @@ range as sample counter for further CS profile generation.
*/
class VirtualUnwinder {
public:
VirtualUnwinder(ContextSampleCounterMap *Counter, const ProfiledBinary *B)
VirtualUnwinder(ContextSampleCounterMap *Counter, ProfiledBinary *B)
: CtxCounterMap(Counter), Binary(B) {}
bool unwind(const PerfSample *Sample, uint64_t Repeat);
std::set<uint64_t> &getUntrackedCallsites() { return UntrackedCallsites; }
@ -495,7 +495,7 @@ private:
ContextSampleCounterMap *CtxCounterMap;
// Profiled binary that current frame address belongs to
const ProfiledBinary *Binary;
ProfiledBinary *Binary;
// Keep track of all untracked callsites
std::set<uint64_t> UntrackedCallsites;
};

View File

@ -184,8 +184,7 @@ void ProfiledBinary::load() {
// TODO: decode other sections.
}
bool ProfiledBinary::inlineContextEqual(uint64_t Address1,
uint64_t Address2) const {
bool ProfiledBinary::inlineContextEqual(uint64_t Address1, uint64_t Address2) {
uint64_t Offset1 = virtualAddrToOffset(Address1);
uint64_t Offset2 = virtualAddrToOffset(Address2);
const SampleContextFrameVector &Context1 = getFrameLocationStack(Offset1);
@ -202,7 +201,7 @@ bool ProfiledBinary::inlineContextEqual(uint64_t Address1,
SampleContextFrameVector
ProfiledBinary::getExpandedContext(const SmallVectorImpl<uint64_t> &Stack,
bool &WasLeafInlined) const {
bool &WasLeafInlined) {
SampleContextFrameVector ContextVec;
// Process from frame root to leaf
for (auto Address : Stack) {
@ -358,7 +357,7 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
// We don't need symbolized info for probe-based profile, just use an
// empty stack as an entry to indicate a valid binary offset
SampleContextFrameVector SymbolizedCallStack;
if (!UsePseudoProbes || TrackFuncContextSize) {
if (TrackFuncContextSize) {
InstructionPointer IP(this, Offset);
// TODO: reallocation of Offset2LocStackMap will lead to dangling
// strings We need ProfiledBinary to owned these string.
@ -369,9 +368,9 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
if (TrackFuncContextSize && !SymbolizedCallStack.empty())
FuncSizeTracker.addInstructionForContext(Offset2LocStackMap[Offset],
Size);
} else {
Offset2LocStackMap[Offset] = SampleContextFrameVector();
}
// Record instruction size.
Offset2InstSizeMap[Offset] = Size;
// Populate address maps.
CodeAddrs.push_back(Offset);

View File

@ -169,6 +169,10 @@ class ProfiledBinary {
std::map<uint64_t, std::pair<std::string, uint64_t>> FuncStartOffsetMap;
// Offset to context location map. Used to expand the context.
std::unordered_map<uint64_t, SampleContextFrameVector> Offset2LocStackMap;
// Offset to instruction size map. Also used for quick offset lookup.
std::unordered_map<uint64_t, uint64_t> Offset2InstSizeMap;
// An array of offsets of all instructions sorted in increasing order. The
// sorting is needed to fast advance to the next forward/backward instruction.
std::vector<uint64_t> CodeAddrs;
@ -269,7 +273,7 @@ public:
bool addressIsCode(uint64_t Address) const {
uint64_t Offset = virtualAddrToOffset(Address);
return Offset2LocStackMap.find(Offset) != Offset2LocStackMap.end();
return Offset2InstSizeMap.find(Offset) != Offset2InstSizeMap.end();
}
bool addressIsCall(uint64_t Address) const {
uint64_t Offset = virtualAddrToOffset(Address);
@ -326,11 +330,14 @@ public:
return FuncSizeTracker.getFuncSizeForContext(Context);
}
const SampleContextFrameVector &getFrameLocationStack(uint64_t Offset) const {
auto I = Offset2LocStackMap.find(Offset);
assert(I != Offset2LocStackMap.end() &&
"Can't find location for offset in the binary");
return I->second;
const SampleContextFrameVector &
getFrameLocationStack(uint64_t Offset, bool UseProbeDiscriminator = false) {
auto I = Offset2LocStackMap.emplace(Offset, SampleContextFrameVector());
if (I.second) {
InstructionPointer IP(this, Offset);
I.first->second = symbolize(IP, true, UseProbeDiscriminator);
}
return I.first->second;
}
Optional<SampleContextFrame> getInlineLeafFrameLoc(uint64_t Offset) {
@ -341,14 +348,14 @@ public:
}
// Compare two addresses' inline context
bool inlineContextEqual(uint64_t Add1, uint64_t Add2) const;
bool inlineContextEqual(uint64_t Add1, uint64_t Add2);
// Get the full context of the current stack with inline context filled in.
// It will search the disassembling info stored in Offset2LocStackMap. This is
// used as the key of function sample map
SampleContextFrameVector
getExpandedContext(const SmallVectorImpl<uint64_t> &Stack,
bool &WasLeafInlined) const;
bool &WasLeafInlined);
const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const {
return ProbeDecoder.getCallProbeForAddr(Address);