[BOLT] Speedup ICF by better function hashing

Summary: Too many hash collisions may cause ICF to run slowly. We used to hash BinaryFunction only looking at instruction opcodes, ignoring instruction operands. With many almost identical functions, such approach may lead to long ICF processing time. By including operands into the hash, we reduce the number of collisions and improve the runtime often by a factor of 2 or more. (cherry picked from FBD20888957)
2020-04-07 00:21:37 -07:00 · 2020-04-07 00:21:37 -07:00 · ee0371ad97
parent abda7dc6a7
commit ee0371ad97
10 changed files with 135 additions and 49 deletions
--- a/bolt/src/BinaryFunction.cpp
+++ b/bolt/src/BinaryFunction.cpp
@ -421,7 +421,7 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
    }
  }
  if (hasCFG()) {
-    OS << "\n  Hash        : "   << Twine::utohexstr(hash());
+    OS << "\n  Hash        : "   << Twine::utohexstr(computeHash());
  }
  if (FrameInstructions.size()) {
    OS << "\n  CFI Instrs  : "   << FrameInstructions.size();
@ -3197,20 +3197,18 @@ BinaryFunction::BasicBlockOrderType BinaryFunction::dfs() const {
  return DFS;
 }

-std::size_t BinaryFunction::hash(bool Recompute, bool UseDFS) const {
+size_t BinaryFunction::computeHash(bool UseDFS,
+                                   OperandHashFuncTy OperandHashFunc) const {
  if (size() == 0)
    return 0;

  assert(hasCFG() && "function is expected to have CFG");

-  if (!Recompute)
-    return Hash;
-
  const auto &Order = UseDFS ? dfs() : BasicBlocksLayout;

-  // The hash is computed by creating a string of all the opcodes
-  // in the function and hashing that string with std::hash.
-  std::string Opcodes;
+  // The hash is computed by creating a string of all instruction opcodes and
+  // possibly their operands and then hashing that string with std::hash.
+  std::string HashString;
  for (const auto *BB : Order) {
    for (const auto &Inst : *BB) {
      unsigned Opcode = Inst.getOpcode();
@ -3224,20 +3222,22 @@ std::size_t BinaryFunction::hash(bool Recompute, bool UseDFS) const {
      if (BC.MIB->isUnconditionalBranch(Inst))
        continue;

-      if (Opcode == 0) {
-        Opcodes.push_back(0);
-        continue;
-      }
+      if (Opcode == 0)
+        HashString.push_back(0);

      while (Opcode) {
        uint8_t LSB = Opcode & 0xff;
-        Opcodes.push_back(LSB);
+        HashString.push_back(LSB);
        Opcode = Opcode >> 8;
      }
+
+      for (int I = 0, E = MCPlus::getNumPrimeOperands(Inst); I != E; ++I) {
+        HashString.append(OperandHashFunc(Inst.getOperand(I)));
+      }
    }
  }

-  return Hash = std::hash<std::string>{}(Opcodes);
+  return Hash = std::hash<std::string>{}(HashString);
 }

 void BinaryFunction::insertBasicBlocks(
--- a/bolt/src/BinaryFunction.h
+++ b/bolt/src/BinaryFunction.h
@ -38,6 +38,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
+#include <functional>
 #include <limits>
 #include <unordered_map>
 #include <unordered_set>
@ -349,7 +350,8 @@ private:
  /// is referenced by UnitLineTable.
  DWARFUnitLineTable UnitLineTable{nullptr, nullptr};

-  /// Last computed hash value.
+  /// Last computed hash value. Note that the value could be recomputed using
+  /// different parameters by every pass.
  mutable uint64_t Hash{0};

  /// For PLT functions it contains a symbol associated with a function
@ -2243,13 +2245,25 @@ public:
  /// Convert function-level branch data into instruction annotations.
  void convertBranchData();

-  /// Returns a hash value for the function. To be used for ICF. Two congruent
-  /// functions (functions with different symbolic references but identical
-  /// otherwise) are required to have identical hashes.
+  /// Returns the last computed hash value of the function.
+  size_t getHash() const {
+    return Hash;
+  }
+
+  using OperandHashFuncTy =
+    function_ref<typename std::string(const MCOperand&)>;
+
+  /// Compute the hash value of the function based on its contents.
  ///
-  /// If \p UseDFS is set, then process blocks in DFS order that we recompute.
-  /// Otherwise use the existing layout order.
-  std::size_t hash(bool Recompute = true, bool UseDFS = false) const;
+  /// If \p UseDFS is set, process basic blocks in DFS order. Otherwise, use
+  /// the existing layout order.
+  ///
+  /// By default, instruction operands are ignored while calculating the hash.
+  /// The caller can change this via passing \p OperandHashFunc function.
+  /// The return result of this function will be mixed with internal hash.
+  size_t computeHash(bool UseDFS = false,
+                     OperandHashFuncTy OperandHashFunc =
+                       [](const MCOperand&) { return std::string(); }) const;

  /// Sets the associated .debug_info entry.
  void addSubprogramDIE(const DWARFDie DIE) {
--- a/bolt/src/BoltDiff.cpp
+++ b/bolt/src/BoltDiff.cpp
@ -165,7 +165,7 @@ class RewriteInstanceDiff {
  // Structures for our 3 matching strategies: by name, by hash and by lto name,
  // from the strongest to the weakest bind between two functions
  StringMap<const BinaryFunction *> NameLookup;
-  DenseMap<std::size_t, const BinaryFunction *> HashLookup;
+  DenseMap<size_t, const BinaryFunction *> HashLookup;
  StringMap<const BinaryFunction *> LTONameLookup1;
  StringMap<const BinaryFunction *> LTONameLookup2;

@ -223,7 +223,7 @@ class RewriteInstanceDiff {
        NameLookup[Name] = &Function;
      }
      if (opts::MatchByHash && Function.hasCFG())
-        HashLookup[Function.hash(true, true)] = &Function;
+        HashLookup[Function.computeHash(/*UseDFS=*/true)] = &Function;
      if (opts::IgnoreLTOSuffix && !LTOName.empty()) {
        if (!LTONameLookup1.count(LTOName))
          LTONameLookup1[LTOName] = &Function;
@ -277,7 +277,7 @@ class RewriteInstanceDiff {
      }
      if (Match || !Function2.hasCFG())
        continue;
-      auto Iter = HashLookup.find(Function2.hash(true, true));
+      auto Iter = HashLookup.find(Function2.computeHash(/*UseDFS*/true));
      if (Iter != HashLookup.end()) {
        FuncMap.insert(std::make_pair<>(&Function2, Iter->second));
        Bin1MappedFuncs.insert(Iter->second);
@ -568,7 +568,8 @@ class RewriteInstanceDiff {
    for (auto I = LargestDiffs.rbegin(), E = LargestDiffs.rend(); I != E; ++I) {
      const auto &MapEntry = I->second;
      if (opts::IgnoreUnchanged &&
-          MapEntry.second->hash(true, true) == MapEntry.first->hash(true, true))
+          MapEntry.second->computeHash(/*UseDFS=*/true) ==
+          MapEntry.first->computeHash(/*UseDFS=*/true))
        continue;
      const auto &Scores = ScoreMap[MapEntry.first];
      outs() << "Function " << MapEntry.first->getDemangledName();
@ -580,8 +581,8 @@ class RewriteInstanceDiff {
             << "%\t(Difference: ";
      printColoredPercentage((Scores.second - Scores.first) * 100.0);
      outs() << ")";
-      if (MapEntry.second->hash(true, true) !=
-          MapEntry.first->hash(true, true)) {
+      if (MapEntry.second->computeHash(/*UseDFS=*/true) !=
+          MapEntry.first->computeHash(/*UseDFS=*/true)) {
        outs() << "\t[Functions have different contents]";
        if (opts::PrintDiffCFG) {
          outs() << "\n *** CFG for function in binary 1:\n";
--- a/bolt/src/DebugData.h
+++ b/bolt/src/DebugData.h
@ -263,7 +263,7 @@ private:
  };

  struct AbbrevHash {
-    std::size_t operator()(const AbbrevAttrPatch &P) const {
+    size_t operator()(const AbbrevAttrPatch &P) const {
      return std::hash<uint64_t>()(((uint64_t)P.Abbrev << 16) + P.Attr);
    }
  };
--- a/bolt/src/JumpTable.cpp
+++ b/bolt/src/JumpTable.cpp
@ -30,7 +30,7 @@ extern cl::opt<unsigned> Verbosity;

 JumpTable::JumpTable(MCSymbol &Symbol,
                     uint64_t Address,
-                     std::size_t EntrySize,
+                     size_t EntrySize,
                     JumpTableType Type,
                     LabelMapType &&Labels,
                     BinaryFunction &BF,
--- a/bolt/src/JumpTable.h
+++ b/bolt/src/JumpTable.h
@ -56,10 +56,10 @@ public:
  };

  /// Size of the entry used for storage.
-  std::size_t EntrySize;
+  size_t EntrySize;

  /// Size of the entry size we will write (we may use a more compact layout)
-  std::size_t OutputEntrySize;
+  size_t OutputEntrySize;

  /// The type of this jump table.
  JumpTableType Type;
@ -91,7 +91,7 @@ private:
  /// Constructor should only be called by a BinaryContext.
  JumpTable(MCSymbol &Symbol,
            uint64_t Address,
-            std::size_t EntrySize,
+            size_t EntrySize,
            JumpTableType Type,
            LabelMapType &&Labels,
            BinaryFunction &BF,
--- a/bolt/src/Passes/IdenticalCodeFolding.cpp
+++ b/bolt/src/Passes/IdenticalCodeFolding.cpp
@ -147,8 +147,8 @@ bool isInstrEquivalentWith(const MCInst &InstA, const BinaryBasicBlock &BBA,
 /// Returns true if this function has identical code and CFG with
 /// the given function \p BF.
 ///
-/// If \p IgnoreSymbols is set to true, then symbolic operands are ignored
-/// during comparison.
+/// If \p IgnoreSymbols is set to true, then symbolic operands
+/// that reference functions are ignored during the comparison.
 ///
 /// If \p UseDFS is set to true, then compute DFS of each function and use
 /// is for CFG equivalency. Potentially it will help to catch more cases,
@ -204,7 +204,7 @@ bool isIdenticalWith(const BinaryFunction &A, const BinaryFunction &B,
      if (IgnoreSymbols) {
        Identical =
          isInstrEquivalentWith(*I, *BB, *OtherI, *OtherBB,
-                                [](const MCSymbol *A, const MCSymbol *B) {
+                                [&](const MCSymbol *A, const MCSymbol *B) {
                                  return true;
                                });
      } else {
@ -266,9 +266,8 @@ bool isIdenticalWith(const BinaryFunction &A, const BinaryFunction &B,
          return equalJumpTables(*JumpTableA, *JumpTableB, A, B);
        };

-        Identical =
-          isInstrEquivalentWith(*I, *BB, *OtherI, *OtherBB,
-                                AreSymbolsIdentical);
+        Identical = isInstrEquivalentWith(*I, *BB, *OtherI, *OtherBB,
+                                          AreSymbolsIdentical);
      }

      if (!Identical) {
@ -302,8 +301,8 @@ bool isIdenticalWith(const BinaryFunction &A, const BinaryFunction &B,
 // This hash table is used to identify identical functions. It maps
 // a function to a bucket of functions identical to it.
 struct KeyHash {
-  std::size_t operator()(const BinaryFunction *F) const {
-    return F->hash(/*Recompute=*/false);
+  size_t operator()(const BinaryFunction *F) const {
+    return F->getHash();
  }
 };

@ -331,6 +330,74 @@ typedef std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>,
                           KeyHash, KeyEqual>
    IdenticalBucketsMap;

+std::string hashInteger(uint64_t Value) {
+  std::string HashString;
+  if (Value == 0) {
+    HashString.push_back(0);
+  }
+  while (Value) {
+    uint8_t LSB = Value & 0xff;
+    HashString.push_back(LSB);
+    Value >>= 8;
+  }
+
+  return HashString;
+}
+
+std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) {
+  std::string HashString;
+
+  // Ignore function references.
+  if (BC.getFunctionForSymbol(&Symbol))
+    return HashString;
+
+  auto ErrorOrValue = BC.getSymbolValue(Symbol);
+  if (!ErrorOrValue)
+    return HashString;
+
+  // Ignore jump tables references.
+  if (BC.getJumpTableContainingAddress(*ErrorOrValue))
+    return HashString;
+
+  return HashString.append(hashInteger(*ErrorOrValue));
+}
+
+std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) {
+  switch (Expr.getKind()) {
+  case MCExpr::Constant:
+    return hashInteger(cast<MCConstantExpr>(Expr).getValue());
+  case MCExpr::SymbolRef:
+    return hashSymbol(BC, cast<MCSymbolRefExpr>(Expr).getSymbol());
+  case MCExpr::Unary: {
+    const auto &UnaryExpr = cast<MCUnaryExpr>(Expr);
+    return hashInteger(UnaryExpr.getOpcode())
+        .append(hashExpr(BC, *UnaryExpr.getSubExpr()));
+  }
+  case MCExpr::Binary: {
+    const auto &BinaryExpr = cast<MCBinaryExpr>(Expr);
+    return hashExpr(BC, *BinaryExpr.getLHS())
+        .append(hashInteger(BinaryExpr.getOpcode()))
+        .append(hashExpr(BC, *BinaryExpr.getRHS()));
+  }
+  case MCExpr::Target:
+    return std::string();
+  }
+
+  llvm_unreachable("invalid expression kind");
+}
+
+std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) {
+  if (Operand.isImm()) {
+    return hashInteger(Operand.getImm());
+  } else if (Operand.isReg()) {
+    return hashInteger(Operand.getReg());
+  } else if (Operand.isExpr()) {
+    return hashExpr(BC, *Operand.getExpr());
+  }
+
+  return std::string();
+}
+
 } // namespace

 namespace llvm {
@ -354,7 +421,11 @@ void IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) {
      BF.updateLayoutIndices();

      // Pre-compute hash before pushing into hashtable.
-      BF.hash(/*Recompute=*/true, opts::UseDFS);
+      // Hash instruction operands to minimize hash collisions.
+      BF.computeHash(opts::UseDFS,
+                     [&BC] (const MCOperand &Op) {
+                       return hashInstOperand(BC, Op);
+                     });
    };

    ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
@ -394,7 +465,7 @@ void IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) {
      ThPool = &ParallelUtilities::getThreadPool();

    // Fold identical functions within a single congruent bucket
-    auto procesSingleBucket = [&](std::set<BinaryFunction *> &Candidates) {
+    auto processSingleBucket = [&](std::set<BinaryFunction *> &Candidates) {
      Timer T("folding single congruent list", "folding single congruent list");
      DEBUG(T.startTimer());

@ -453,9 +524,9 @@ void IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) {
        continue;

      if (opts::NoThreads)
-        procesSingleBucket(Bucket);
+        processSingleBucket(Bucket);
      else
-        ThPool->async(procesSingleBucket, std::ref(Bucket));
+        ThPool->async(processSingleBucket, std::ref(Bucket));
    }

    if (!opts::NoThreads)
--- a/bolt/src/ProfileReader.cpp
+++ b/bolt/src/ProfileReader.cpp
@ -72,7 +72,7 @@ ProfileReader::parseFunctionProfile(BinaryFunction &BF,

  BF.setExecutionCount(YamlBF.ExecCount);

-  if (!opts::IgnoreHash && YamlBF.Hash != BF.hash(true, true)) {
+  if (!opts::IgnoreHash && YamlBF.Hash != BF.computeHash(/*UseDFS=*/true)) {
    if (opts::Verbosity >= 1)
      errs() << "BOLT-WARNING: function hash mismatch\n";
    ProfileMatched = false;
@ -269,7 +269,7 @@ ProfileReader::readProfile(const std::string &FileName,
    if (opts::IgnoreHash && Profile.NumBasicBlocks == BF.size())
      return true;
    if (!opts::IgnoreHash &&
-        Profile.Hash == static_cast<uint64_t>(BF.hash(/*Recompute = */false)))
+        Profile.Hash == static_cast<uint64_t>(BF.getHash()))
      return true;
    return false;
  };
@ -282,7 +282,7 @@ ProfileReader::readProfile(const std::string &FileName,

    // Recompute hash once per function.
    if (!opts::IgnoreHash)
-      Function.hash(/*Recompute = */true, true);
+      Function.computeHash(/*UseDFS=*/true);

    for (auto FunctionName : Function.getNames()) {
      auto PI = ProfileNameToProfile.find(FunctionName);
--- a/bolt/src/ProfileWriter.cpp
+++ b/bolt/src/ProfileWriter.cpp
@ -36,7 +36,7 @@ convert(const BinaryFunction &BF, yaml::bolt::BinaryFunctionProfile &YamlBF) {

  YamlBF.Name = BF.getPrintName();
  YamlBF.Id = BF.getFunctionNumber();
-  YamlBF.Hash = BF.hash(true, true);
+  YamlBF.Hash = BF.computeHash(/*UseDFS=*/true);
  YamlBF.NumBasicBlocks = BF.size();
  YamlBF.ExecCount = BF.getKnownExecutionCount();

--- a/bolt/src/RewriteInstance.cpp
+++ b/bolt/src/RewriteInstance.cpp
@ -849,7 +849,7 @@ void RewriteInstance::discoverFileObjects() {
  StringRef  FileSymbolName;
  bool SeenFileName = false;
  struct SymbolRefHash {
-    std::size_t operator()(SymbolRef const &S) const {
+    size_t operator()(SymbolRef const &S) const {
      return std::hash<decltype(DataRefImpl::p)>{}(S.getRawDataRefImpl().p);
    }
  };