diff --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h index 25fa44284734..1a8a2a3014a9 100644 --- a/llvm/tools/llvm-profgen/PerfReader.h +++ b/llvm/tools/llvm-profgen/PerfReader.h @@ -388,8 +388,8 @@ using ContextSampleCounterMap = struct FrameStack { SmallVector Stack; - const ProfiledBinary *Binary; - FrameStack(const ProfiledBinary *B) : Binary(B) {} + ProfiledBinary *Binary; + FrameStack(ProfiledBinary *B) : Binary(B) {} bool pushFrame(UnwindState::ProfiledFrame *Cur) { Stack.push_back(Cur->Address); return true; @@ -404,8 +404,8 @@ struct FrameStack { struct ProbeStack { SmallVector Stack; - const ProfiledBinary *Binary; - ProbeStack(const ProfiledBinary *B) : Binary(B) {} + ProfiledBinary *Binary; + ProbeStack(ProfiledBinary *B) : Binary(B) {} bool pushFrame(UnwindState::ProfiledFrame *Cur) { const MCDecodedPseudoProbe *CallProbe = Binary->getCallProbeForAddr(Cur->Address); @@ -458,7 +458,7 @@ range as sample counter for further CS profile generation. */ class VirtualUnwinder { public: - VirtualUnwinder(ContextSampleCounterMap *Counter, const ProfiledBinary *B) + VirtualUnwinder(ContextSampleCounterMap *Counter, ProfiledBinary *B) : CtxCounterMap(Counter), Binary(B) {} bool unwind(const PerfSample *Sample, uint64_t Repeat); std::set &getUntrackedCallsites() { return UntrackedCallsites; } @@ -495,7 +495,7 @@ private: ContextSampleCounterMap *CtxCounterMap; // Profiled binary that current frame address belongs to - const ProfiledBinary *Binary; + ProfiledBinary *Binary; // Keep track of all untracked callsites std::set UntrackedCallsites; }; diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index b04150842fb0..1e8da1550005 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -184,8 +184,7 @@ void ProfiledBinary::load() { // TODO: decode other sections. } -bool ProfiledBinary::inlineContextEqual(uint64_t Address1, - uint64_t Address2) const { +bool ProfiledBinary::inlineContextEqual(uint64_t Address1, uint64_t Address2) { uint64_t Offset1 = virtualAddrToOffset(Address1); uint64_t Offset2 = virtualAddrToOffset(Address2); const SampleContextFrameVector &Context1 = getFrameLocationStack(Offset1); @@ -202,7 +201,7 @@ bool ProfiledBinary::inlineContextEqual(uint64_t Address1, SampleContextFrameVector ProfiledBinary::getExpandedContext(const SmallVectorImpl &Stack, - bool &WasLeafInlined) const { + bool &WasLeafInlined) { SampleContextFrameVector ContextVec; // Process from frame root to leaf for (auto Address : Stack) { @@ -358,7 +357,7 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef Bytes, // We don't need symbolized info for probe-based profile, just use an // empty stack as an entry to indicate a valid binary offset SampleContextFrameVector SymbolizedCallStack; - if (!UsePseudoProbes || TrackFuncContextSize) { + if (TrackFuncContextSize) { InstructionPointer IP(this, Offset); // TODO: reallocation of Offset2LocStackMap will lead to dangling // strings We need ProfiledBinary to owned these string. @@ -369,9 +368,9 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef Bytes, if (TrackFuncContextSize && !SymbolizedCallStack.empty()) FuncSizeTracker.addInstructionForContext(Offset2LocStackMap[Offset], Size); - } else { - Offset2LocStackMap[Offset] = SampleContextFrameVector(); } + // Record instruction size. + Offset2InstSizeMap[Offset] = Size; // Populate address maps. CodeAddrs.push_back(Offset); diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h index 01962d9d38e5..fd719ef443e6 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -169,6 +169,10 @@ class ProfiledBinary { std::map> FuncStartOffsetMap; // Offset to context location map. Used to expand the context. std::unordered_map Offset2LocStackMap; + + // Offset to instruction size map. Also used for quick offset lookup. + std::unordered_map Offset2InstSizeMap; + // An array of offsets of all instructions sorted in increasing order. The // sorting is needed to fast advance to the next forward/backward instruction. std::vector CodeAddrs; @@ -269,7 +273,7 @@ public: bool addressIsCode(uint64_t Address) const { uint64_t Offset = virtualAddrToOffset(Address); - return Offset2LocStackMap.find(Offset) != Offset2LocStackMap.end(); + return Offset2InstSizeMap.find(Offset) != Offset2InstSizeMap.end(); } bool addressIsCall(uint64_t Address) const { uint64_t Offset = virtualAddrToOffset(Address); @@ -326,11 +330,14 @@ public: return FuncSizeTracker.getFuncSizeForContext(Context); } - const SampleContextFrameVector &getFrameLocationStack(uint64_t Offset) const { - auto I = Offset2LocStackMap.find(Offset); - assert(I != Offset2LocStackMap.end() && - "Can't find location for offset in the binary"); - return I->second; + const SampleContextFrameVector & + getFrameLocationStack(uint64_t Offset, bool UseProbeDiscriminator = false) { + auto I = Offset2LocStackMap.emplace(Offset, SampleContextFrameVector()); + if (I.second) { + InstructionPointer IP(this, Offset); + I.first->second = symbolize(IP, true, UseProbeDiscriminator); + } + return I.first->second; } Optional getInlineLeafFrameLoc(uint64_t Offset) { @@ -341,14 +348,14 @@ public: } // Compare two addresses' inline context - bool inlineContextEqual(uint64_t Add1, uint64_t Add2) const; + bool inlineContextEqual(uint64_t Add1, uint64_t Add2); // Get the full context of the current stack with inline context filled in. // It will search the disassembling info stored in Offset2LocStackMap. This is // used as the key of function sample map SampleContextFrameVector getExpandedContext(const SmallVectorImpl &Stack, - bool &WasLeafInlined) const; + bool &WasLeafInlined); const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const { return ProbeDecoder.getCallProbeForAddr(Address);