[llvm-profgen] On-demand symbolization

Previously we do symbolization for all the functions and actually we only need the symbols that's hit by the samples. This can significantly speed up the time for large size binary. Optimization for per-inliner will come along with next patch. Reviewed By: hoy, wenlei Differential Revision: https://reviews.llvm.org/D110465
2021-09-24 17:06:39 -07:00 · 2021-09-24 17:06:39 -07:00 · 091c16f76b
parent 70391b3468
commit 091c16f76b
3 changed files with 26 additions and 20 deletions
--- a/llvm/tools/llvm-profgen/PerfReader.h
+++ b/llvm/tools/llvm-profgen/PerfReader.h
@ -388,8 +388,8 @@ using ContextSampleCounterMap =

 struct FrameStack {
  SmallVector<uint64_t, 16> Stack;
-  const ProfiledBinary *Binary;
-  FrameStack(const ProfiledBinary *B) : Binary(B) {}
+  ProfiledBinary *Binary;
+  FrameStack(ProfiledBinary *B) : Binary(B) {}
  bool pushFrame(UnwindState::ProfiledFrame *Cur) {
    Stack.push_back(Cur->Address);
    return true;
@ -404,8 +404,8 @@ struct FrameStack {

 struct ProbeStack {
  SmallVector<const MCDecodedPseudoProbe *, 16> Stack;
-  const ProfiledBinary *Binary;
-  ProbeStack(const ProfiledBinary *B) : Binary(B) {}
+  ProfiledBinary *Binary;
+  ProbeStack(ProfiledBinary *B) : Binary(B) {}
  bool pushFrame(UnwindState::ProfiledFrame *Cur) {
    const MCDecodedPseudoProbe *CallProbe =
        Binary->getCallProbeForAddr(Cur->Address);
@ -458,7 +458,7 @@ range as sample counter for further CS profile generation.
 */
 class VirtualUnwinder {
 public:
-  VirtualUnwinder(ContextSampleCounterMap *Counter, const ProfiledBinary *B)
+  VirtualUnwinder(ContextSampleCounterMap *Counter, ProfiledBinary *B)
      : CtxCounterMap(Counter), Binary(B) {}
  bool unwind(const PerfSample *Sample, uint64_t Repeat);
  std::set<uint64_t> &getUntrackedCallsites() { return UntrackedCallsites; }
@ -495,7 +495,7 @@ private:

  ContextSampleCounterMap *CtxCounterMap;
  // Profiled binary that current frame address belongs to
-  const ProfiledBinary *Binary;
+  ProfiledBinary *Binary;
  // Keep track of all untracked callsites
  std::set<uint64_t> UntrackedCallsites;
 };
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@ -184,8 +184,7 @@ void ProfiledBinary::load() {
  // TODO: decode other sections.
 }

-bool ProfiledBinary::inlineContextEqual(uint64_t Address1,
-                                        uint64_t Address2) const {
+bool ProfiledBinary::inlineContextEqual(uint64_t Address1, uint64_t Address2) {
  uint64_t Offset1 = virtualAddrToOffset(Address1);
  uint64_t Offset2 = virtualAddrToOffset(Address2);
  const SampleContextFrameVector &Context1 = getFrameLocationStack(Offset1);
@ -202,7 +201,7 @@ bool ProfiledBinary::inlineContextEqual(uint64_t Address1,

 SampleContextFrameVector
 ProfiledBinary::getExpandedContext(const SmallVectorImpl<uint64_t> &Stack,
-                                   bool &WasLeafInlined) const {
+                                   bool &WasLeafInlined) {
  SampleContextFrameVector ContextVec;
  // Process from frame root to leaf
  for (auto Address : Stack) {
@ -358,7 +357,7 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
      // We don't need symbolized info for probe-based profile, just use an
      // empty stack as an entry to indicate a valid binary offset
      SampleContextFrameVector SymbolizedCallStack;
-      if (!UsePseudoProbes || TrackFuncContextSize) {
+      if (TrackFuncContextSize) {
        InstructionPointer IP(this, Offset);
        // TODO: reallocation of Offset2LocStackMap will lead to dangling
        // strings We need ProfiledBinary to owned these string.
@ -369,9 +368,9 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
        if (TrackFuncContextSize && !SymbolizedCallStack.empty())
          FuncSizeTracker.addInstructionForContext(Offset2LocStackMap[Offset],
                                                   Size);
-      } else {
-        Offset2LocStackMap[Offset] = SampleContextFrameVector();
      }
+      // Record instruction size.
+      Offset2InstSizeMap[Offset] = Size;

      // Populate address maps.
      CodeAddrs.push_back(Offset);
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@ -169,6 +169,10 @@ class ProfiledBinary {
  std::map<uint64_t, std::pair<std::string, uint64_t>> FuncStartOffsetMap;
  // Offset to context location map. Used to expand the context.
  std::unordered_map<uint64_t, SampleContextFrameVector> Offset2LocStackMap;
+
+  // Offset to instruction size map. Also used for quick offset lookup.
+  std::unordered_map<uint64_t, uint64_t> Offset2InstSizeMap;
+
  // An array of offsets of all instructions sorted in increasing order. The
  // sorting is needed to fast advance to the next forward/backward instruction.
  std::vector<uint64_t> CodeAddrs;
@ -269,7 +273,7 @@ public:

  bool addressIsCode(uint64_t Address) const {
    uint64_t Offset = virtualAddrToOffset(Address);
-    return Offset2LocStackMap.find(Offset) != Offset2LocStackMap.end();
+    return Offset2InstSizeMap.find(Offset) != Offset2InstSizeMap.end();
  }
  bool addressIsCall(uint64_t Address) const {
    uint64_t Offset = virtualAddrToOffset(Address);
@ -326,11 +330,14 @@ public:
    return FuncSizeTracker.getFuncSizeForContext(Context);
  }

-  const SampleContextFrameVector &getFrameLocationStack(uint64_t Offset) const {
-    auto I = Offset2LocStackMap.find(Offset);
-    assert(I != Offset2LocStackMap.end() &&
-           "Can't find location for offset in the binary");
-    return I->second;
+  const SampleContextFrameVector &
+  getFrameLocationStack(uint64_t Offset, bool UseProbeDiscriminator = false) {
+    auto I = Offset2LocStackMap.emplace(Offset, SampleContextFrameVector());
+    if (I.second) {
+      InstructionPointer IP(this, Offset);
+      I.first->second = symbolize(IP, true, UseProbeDiscriminator);
+    }
+    return I.first->second;
  }

  Optional<SampleContextFrame> getInlineLeafFrameLoc(uint64_t Offset) {
@ -341,14 +348,14 @@ public:
  }

  // Compare two addresses' inline context
-  bool inlineContextEqual(uint64_t Add1, uint64_t Add2) const;
+  bool inlineContextEqual(uint64_t Add1, uint64_t Add2);

  // Get the full context of the current stack with inline context filled in.
  // It will search the disassembling info stored in Offset2LocStackMap. This is
  // used as the key of function sample map
  SampleContextFrameVector
  getExpandedContext(const SmallVectorImpl<uint64_t> &Stack,
-                     bool &WasLeafInlined) const;
+                     bool &WasLeafInlined);

  const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const {
    return ProbeDecoder.getCallProbeForAddr(Address);