From ee0371ad97c383bf676d6a04c1c0a6fae907c4da Mon Sep 17 00:00:00 2001 From: Maksim Panchenko Date: Tue, 7 Apr 2020 00:21:37 -0700 Subject: [PATCH] [BOLT] Speedup ICF by better function hashing Summary: Too many hash collisions may cause ICF to run slowly. We used to hash BinaryFunction only looking at instruction opcodes, ignoring instruction operands. With many almost identical functions, such approach may lead to long ICF processing time. By including operands into the hash, we reduce the number of collisions and improve the runtime often by a factor of 2 or more. (cherry picked from FBD20888957) --- bolt/src/BinaryFunction.cpp | 28 +++---- bolt/src/BinaryFunction.h | 28 +++++-- bolt/src/BoltDiff.cpp | 13 ++-- bolt/src/DebugData.h | 2 +- bolt/src/JumpTable.cpp | 2 +- bolt/src/JumpTable.h | 6 +- bolt/src/Passes/IdenticalCodeFolding.cpp | 95 +++++++++++++++++++++--- bolt/src/ProfileReader.cpp | 6 +- bolt/src/ProfileWriter.cpp | 2 +- bolt/src/RewriteInstance.cpp | 2 +- 10 files changed, 135 insertions(+), 49 deletions(-) diff --git a/bolt/src/BinaryFunction.cpp b/bolt/src/BinaryFunction.cpp index 625964f9afb4..7286792aa544 100644 --- a/bolt/src/BinaryFunction.cpp +++ b/bolt/src/BinaryFunction.cpp @@ -421,7 +421,7 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation, } } if (hasCFG()) { - OS << "\n Hash : " << Twine::utohexstr(hash()); + OS << "\n Hash : " << Twine::utohexstr(computeHash()); } if (FrameInstructions.size()) { OS << "\n CFI Instrs : " << FrameInstructions.size(); @@ -3197,20 +3197,18 @@ BinaryFunction::BasicBlockOrderType BinaryFunction::dfs() const { return DFS; } -std::size_t BinaryFunction::hash(bool Recompute, bool UseDFS) const { +size_t BinaryFunction::computeHash(bool UseDFS, + OperandHashFuncTy OperandHashFunc) const { if (size() == 0) return 0; assert(hasCFG() && "function is expected to have CFG"); - if (!Recompute) - return Hash; - const auto &Order = UseDFS ? dfs() : BasicBlocksLayout; - // The hash is computed by creating a string of all the opcodes - // in the function and hashing that string with std::hash. - std::string Opcodes; + // The hash is computed by creating a string of all instruction opcodes and + // possibly their operands and then hashing that string with std::hash. + std::string HashString; for (const auto *BB : Order) { for (const auto &Inst : *BB) { unsigned Opcode = Inst.getOpcode(); @@ -3224,20 +3222,22 @@ std::size_t BinaryFunction::hash(bool Recompute, bool UseDFS) const { if (BC.MIB->isUnconditionalBranch(Inst)) continue; - if (Opcode == 0) { - Opcodes.push_back(0); - continue; - } + if (Opcode == 0) + HashString.push_back(0); while (Opcode) { uint8_t LSB = Opcode & 0xff; - Opcodes.push_back(LSB); + HashString.push_back(LSB); Opcode = Opcode >> 8; } + + for (int I = 0, E = MCPlus::getNumPrimeOperands(Inst); I != E; ++I) { + HashString.append(OperandHashFunc(Inst.getOperand(I))); + } } } - return Hash = std::hash{}(Opcodes); + return Hash = std::hash{}(HashString); } void BinaryFunction::insertBasicBlocks( diff --git a/bolt/src/BinaryFunction.h b/bolt/src/BinaryFunction.h index a41457764a4e..a3d770fb77a2 100644 --- a/bolt/src/BinaryFunction.h +++ b/bolt/src/BinaryFunction.h @@ -38,6 +38,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include +#include #include #include #include @@ -349,7 +350,8 @@ private: /// is referenced by UnitLineTable. DWARFUnitLineTable UnitLineTable{nullptr, nullptr}; - /// Last computed hash value. + /// Last computed hash value. Note that the value could be recomputed using + /// different parameters by every pass. mutable uint64_t Hash{0}; /// For PLT functions it contains a symbol associated with a function @@ -2243,13 +2245,25 @@ public: /// Convert function-level branch data into instruction annotations. void convertBranchData(); - /// Returns a hash value for the function. To be used for ICF. Two congruent - /// functions (functions with different symbolic references but identical - /// otherwise) are required to have identical hashes. + /// Returns the last computed hash value of the function. + size_t getHash() const { + return Hash; + } + + using OperandHashFuncTy = + function_ref; + + /// Compute the hash value of the function based on its contents. /// - /// If \p UseDFS is set, then process blocks in DFS order that we recompute. - /// Otherwise use the existing layout order. - std::size_t hash(bool Recompute = true, bool UseDFS = false) const; + /// If \p UseDFS is set, process basic blocks in DFS order. Otherwise, use + /// the existing layout order. + /// + /// By default, instruction operands are ignored while calculating the hash. + /// The caller can change this via passing \p OperandHashFunc function. + /// The return result of this function will be mixed with internal hash. + size_t computeHash(bool UseDFS = false, + OperandHashFuncTy OperandHashFunc = + [](const MCOperand&) { return std::string(); }) const; /// Sets the associated .debug_info entry. void addSubprogramDIE(const DWARFDie DIE) { diff --git a/bolt/src/BoltDiff.cpp b/bolt/src/BoltDiff.cpp index 95b1f91916b6..f663fc8fe989 100644 --- a/bolt/src/BoltDiff.cpp +++ b/bolt/src/BoltDiff.cpp @@ -165,7 +165,7 @@ class RewriteInstanceDiff { // Structures for our 3 matching strategies: by name, by hash and by lto name, // from the strongest to the weakest bind between two functions StringMap NameLookup; - DenseMap HashLookup; + DenseMap HashLookup; StringMap LTONameLookup1; StringMap LTONameLookup2; @@ -223,7 +223,7 @@ class RewriteInstanceDiff { NameLookup[Name] = &Function; } if (opts::MatchByHash && Function.hasCFG()) - HashLookup[Function.hash(true, true)] = &Function; + HashLookup[Function.computeHash(/*UseDFS=*/true)] = &Function; if (opts::IgnoreLTOSuffix && !LTOName.empty()) { if (!LTONameLookup1.count(LTOName)) LTONameLookup1[LTOName] = &Function; @@ -277,7 +277,7 @@ class RewriteInstanceDiff { } if (Match || !Function2.hasCFG()) continue; - auto Iter = HashLookup.find(Function2.hash(true, true)); + auto Iter = HashLookup.find(Function2.computeHash(/*UseDFS*/true)); if (Iter != HashLookup.end()) { FuncMap.insert(std::make_pair<>(&Function2, Iter->second)); Bin1MappedFuncs.insert(Iter->second); @@ -568,7 +568,8 @@ class RewriteInstanceDiff { for (auto I = LargestDiffs.rbegin(), E = LargestDiffs.rend(); I != E; ++I) { const auto &MapEntry = I->second; if (opts::IgnoreUnchanged && - MapEntry.second->hash(true, true) == MapEntry.first->hash(true, true)) + MapEntry.second->computeHash(/*UseDFS=*/true) == + MapEntry.first->computeHash(/*UseDFS=*/true)) continue; const auto &Scores = ScoreMap[MapEntry.first]; outs() << "Function " << MapEntry.first->getDemangledName(); @@ -580,8 +581,8 @@ class RewriteInstanceDiff { << "%\t(Difference: "; printColoredPercentage((Scores.second - Scores.first) * 100.0); outs() << ")"; - if (MapEntry.second->hash(true, true) != - MapEntry.first->hash(true, true)) { + if (MapEntry.second->computeHash(/*UseDFS=*/true) != + MapEntry.first->computeHash(/*UseDFS=*/true)) { outs() << "\t[Functions have different contents]"; if (opts::PrintDiffCFG) { outs() << "\n *** CFG for function in binary 1:\n"; diff --git a/bolt/src/DebugData.h b/bolt/src/DebugData.h index 9261b43aed3e..0fe6dde99ea8 100644 --- a/bolt/src/DebugData.h +++ b/bolt/src/DebugData.h @@ -263,7 +263,7 @@ private: }; struct AbbrevHash { - std::size_t operator()(const AbbrevAttrPatch &P) const { + size_t operator()(const AbbrevAttrPatch &P) const { return std::hash()(((uint64_t)P.Abbrev << 16) + P.Attr); } }; diff --git a/bolt/src/JumpTable.cpp b/bolt/src/JumpTable.cpp index d1708466201e..6c997f222e2b 100644 --- a/bolt/src/JumpTable.cpp +++ b/bolt/src/JumpTable.cpp @@ -30,7 +30,7 @@ extern cl::opt Verbosity; JumpTable::JumpTable(MCSymbol &Symbol, uint64_t Address, - std::size_t EntrySize, + size_t EntrySize, JumpTableType Type, LabelMapType &&Labels, BinaryFunction &BF, diff --git a/bolt/src/JumpTable.h b/bolt/src/JumpTable.h index fdc55f32d32d..c8a4e38dff2e 100644 --- a/bolt/src/JumpTable.h +++ b/bolt/src/JumpTable.h @@ -56,10 +56,10 @@ public: }; /// Size of the entry used for storage. - std::size_t EntrySize; + size_t EntrySize; /// Size of the entry size we will write (we may use a more compact layout) - std::size_t OutputEntrySize; + size_t OutputEntrySize; /// The type of this jump table. JumpTableType Type; @@ -91,7 +91,7 @@ private: /// Constructor should only be called by a BinaryContext. JumpTable(MCSymbol &Symbol, uint64_t Address, - std::size_t EntrySize, + size_t EntrySize, JumpTableType Type, LabelMapType &&Labels, BinaryFunction &BF, diff --git a/bolt/src/Passes/IdenticalCodeFolding.cpp b/bolt/src/Passes/IdenticalCodeFolding.cpp index 34cea93538d3..3b90403a168f 100644 --- a/bolt/src/Passes/IdenticalCodeFolding.cpp +++ b/bolt/src/Passes/IdenticalCodeFolding.cpp @@ -147,8 +147,8 @@ bool isInstrEquivalentWith(const MCInst &InstA, const BinaryBasicBlock &BBA, /// Returns true if this function has identical code and CFG with /// the given function \p BF. /// -/// If \p IgnoreSymbols is set to true, then symbolic operands are ignored -/// during comparison. +/// If \p IgnoreSymbols is set to true, then symbolic operands +/// that reference functions are ignored during the comparison. /// /// If \p UseDFS is set to true, then compute DFS of each function and use /// is for CFG equivalency. Potentially it will help to catch more cases, @@ -204,7 +204,7 @@ bool isIdenticalWith(const BinaryFunction &A, const BinaryFunction &B, if (IgnoreSymbols) { Identical = isInstrEquivalentWith(*I, *BB, *OtherI, *OtherBB, - [](const MCSymbol *A, const MCSymbol *B) { + [&](const MCSymbol *A, const MCSymbol *B) { return true; }); } else { @@ -266,9 +266,8 @@ bool isIdenticalWith(const BinaryFunction &A, const BinaryFunction &B, return equalJumpTables(*JumpTableA, *JumpTableB, A, B); }; - Identical = - isInstrEquivalentWith(*I, *BB, *OtherI, *OtherBB, - AreSymbolsIdentical); + Identical = isInstrEquivalentWith(*I, *BB, *OtherI, *OtherBB, + AreSymbolsIdentical); } if (!Identical) { @@ -302,8 +301,8 @@ bool isIdenticalWith(const BinaryFunction &A, const BinaryFunction &B, // This hash table is used to identify identical functions. It maps // a function to a bucket of functions identical to it. struct KeyHash { - std::size_t operator()(const BinaryFunction *F) const { - return F->hash(/*Recompute=*/false); + size_t operator()(const BinaryFunction *F) const { + return F->getHash(); } }; @@ -331,6 +330,74 @@ typedef std::unordered_map, KeyHash, KeyEqual> IdenticalBucketsMap; +std::string hashInteger(uint64_t Value) { + std::string HashString; + if (Value == 0) { + HashString.push_back(0); + } + while (Value) { + uint8_t LSB = Value & 0xff; + HashString.push_back(LSB); + Value >>= 8; + } + + return HashString; +} + +std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) { + std::string HashString; + + // Ignore function references. + if (BC.getFunctionForSymbol(&Symbol)) + return HashString; + + auto ErrorOrValue = BC.getSymbolValue(Symbol); + if (!ErrorOrValue) + return HashString; + + // Ignore jump tables references. + if (BC.getJumpTableContainingAddress(*ErrorOrValue)) + return HashString; + + return HashString.append(hashInteger(*ErrorOrValue)); +} + +std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) { + switch (Expr.getKind()) { + case MCExpr::Constant: + return hashInteger(cast(Expr).getValue()); + case MCExpr::SymbolRef: + return hashSymbol(BC, cast(Expr).getSymbol()); + case MCExpr::Unary: { + const auto &UnaryExpr = cast(Expr); + return hashInteger(UnaryExpr.getOpcode()) + .append(hashExpr(BC, *UnaryExpr.getSubExpr())); + } + case MCExpr::Binary: { + const auto &BinaryExpr = cast(Expr); + return hashExpr(BC, *BinaryExpr.getLHS()) + .append(hashInteger(BinaryExpr.getOpcode())) + .append(hashExpr(BC, *BinaryExpr.getRHS())); + } + case MCExpr::Target: + return std::string(); + } + + llvm_unreachable("invalid expression kind"); +} + +std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) { + if (Operand.isImm()) { + return hashInteger(Operand.getImm()); + } else if (Operand.isReg()) { + return hashInteger(Operand.getReg()); + } else if (Operand.isExpr()) { + return hashExpr(BC, *Operand.getExpr()); + } + + return std::string(); +} + } // namespace namespace llvm { @@ -354,7 +421,11 @@ void IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) { BF.updateLayoutIndices(); // Pre-compute hash before pushing into hashtable. - BF.hash(/*Recompute=*/true, opts::UseDFS); + // Hash instruction operands to minimize hash collisions. + BF.computeHash(opts::UseDFS, + [&BC] (const MCOperand &Op) { + return hashInstOperand(BC, Op); + }); }; ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) { @@ -394,7 +465,7 @@ void IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) { ThPool = &ParallelUtilities::getThreadPool(); // Fold identical functions within a single congruent bucket - auto procesSingleBucket = [&](std::set &Candidates) { + auto processSingleBucket = [&](std::set &Candidates) { Timer T("folding single congruent list", "folding single congruent list"); DEBUG(T.startTimer()); @@ -453,9 +524,9 @@ void IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) { continue; if (opts::NoThreads) - procesSingleBucket(Bucket); + processSingleBucket(Bucket); else - ThPool->async(procesSingleBucket, std::ref(Bucket)); + ThPool->async(processSingleBucket, std::ref(Bucket)); } if (!opts::NoThreads) diff --git a/bolt/src/ProfileReader.cpp b/bolt/src/ProfileReader.cpp index 89bb567105ee..ba442dad6265 100644 --- a/bolt/src/ProfileReader.cpp +++ b/bolt/src/ProfileReader.cpp @@ -72,7 +72,7 @@ ProfileReader::parseFunctionProfile(BinaryFunction &BF, BF.setExecutionCount(YamlBF.ExecCount); - if (!opts::IgnoreHash && YamlBF.Hash != BF.hash(true, true)) { + if (!opts::IgnoreHash && YamlBF.Hash != BF.computeHash(/*UseDFS=*/true)) { if (opts::Verbosity >= 1) errs() << "BOLT-WARNING: function hash mismatch\n"; ProfileMatched = false; @@ -269,7 +269,7 @@ ProfileReader::readProfile(const std::string &FileName, if (opts::IgnoreHash && Profile.NumBasicBlocks == BF.size()) return true; if (!opts::IgnoreHash && - Profile.Hash == static_cast(BF.hash(/*Recompute = */false))) + Profile.Hash == static_cast(BF.getHash())) return true; return false; }; @@ -282,7 +282,7 @@ ProfileReader::readProfile(const std::string &FileName, // Recompute hash once per function. if (!opts::IgnoreHash) - Function.hash(/*Recompute = */true, true); + Function.computeHash(/*UseDFS=*/true); for (auto FunctionName : Function.getNames()) { auto PI = ProfileNameToProfile.find(FunctionName); diff --git a/bolt/src/ProfileWriter.cpp b/bolt/src/ProfileWriter.cpp index 54ba8171d039..29d9ea25aa25 100644 --- a/bolt/src/ProfileWriter.cpp +++ b/bolt/src/ProfileWriter.cpp @@ -36,7 +36,7 @@ convert(const BinaryFunction &BF, yaml::bolt::BinaryFunctionProfile &YamlBF) { YamlBF.Name = BF.getPrintName(); YamlBF.Id = BF.getFunctionNumber(); - YamlBF.Hash = BF.hash(true, true); + YamlBF.Hash = BF.computeHash(/*UseDFS=*/true); YamlBF.NumBasicBlocks = BF.size(); YamlBF.ExecCount = BF.getKnownExecutionCount(); diff --git a/bolt/src/RewriteInstance.cpp b/bolt/src/RewriteInstance.cpp index e1d6c3da7f1a..f4b39bf5e925 100644 --- a/bolt/src/RewriteInstance.cpp +++ b/bolt/src/RewriteInstance.cpp @@ -849,7 +849,7 @@ void RewriteInstance::discoverFileObjects() { StringRef FileSymbolName; bool SeenFileName = false; struct SymbolRefHash { - std::size_t operator()(SymbolRef const &S) const { + size_t operator()(SymbolRef const &S) const { return std::hash{}(S.getRawDataRefImpl().p); } };