diff --git a/bolt/BinaryContext.cpp b/bolt/BinaryContext.cpp index 79f7338021cc..3430807502bd 100644 --- a/bolt/BinaryContext.cpp +++ b/bolt/BinaryContext.cpp @@ -104,6 +104,7 @@ void BinaryContext::foldFunction(BinaryFunction &ChildBF, ChildBF.Names.clear(); ChildBF.Names.push_back(NewName); ChildBF.OutputSymbol = Ctx->getOrCreateSymbol(NewName); + ChildBF.setFolded(); } } diff --git a/bolt/BinaryFunction.cpp b/bolt/BinaryFunction.cpp index 574a1fb4be63..42a5bd96c664 100644 --- a/bolt/BinaryFunction.cpp +++ b/bolt/BinaryFunction.cpp @@ -3531,6 +3531,11 @@ DynoStats BinaryFunction::getDynoStats() const { if (!isSimple() || !hasValidProfile()) return Stats; + // If the function was folded in non-relocation mode we keep its profile + // for optimization. However, it should be excluded from the dyno stats. + if (isFolded()) + return Stats; + // Update enumeration of basic blocks for correct detection of branch' // direction. updateLayoutIndices(); diff --git a/bolt/BinaryFunction.h b/bolt/BinaryFunction.h index ed9cd8915ee5..9fad1bd5b9ad 100644 --- a/bolt/BinaryFunction.h +++ b/bolt/BinaryFunction.h @@ -253,6 +253,10 @@ private: /// True if the function has more than one entry point. bool IsMultiEntry{false}; + /// Indicate if the function body was folded into another function. Used + /// for ICF optimization without relocations. + bool IsFolded{false}; + /// The address for the code for this function in codegen memory. uint64_t ImageAddress{0}; @@ -1019,6 +1023,10 @@ public: return IsMultiEntry; } + bool isFolded() const { + return IsFolded; + } + /// Return true if the function uses jump tables. bool hasJumpTables() const { return JumpTables.size(); @@ -1291,6 +1299,11 @@ public: return *this; } + BinaryFunction &setFolded(bool Folded = true) { + IsFolded = Folded; + return *this; + } + BinaryFunction &setPersonalityFunction(uint64_t Addr) { PersonalityFunction = BC.getOrCreateGlobalSymbol(Addr, "FUNCat"); return *this; diff --git a/bolt/BinaryPasses.cpp b/bolt/BinaryPasses.cpp index 7036094fd811..2b5ecb7f9e88 100644 --- a/bolt/BinaryPasses.cpp +++ b/bolt/BinaryPasses.cpp @@ -1330,6 +1330,7 @@ void IdenticalCodeFolding::runOnFunctions(BinaryContext &BC, uint64_t NumFunctionsFolded = 0; uint64_t NumJTFunctionsFolded = 0; uint64_t BytesSavedEstimate = 0; + uint64_t CallsSavedEstimate = 0; static bool UseDFS = opts::UseDFSForICF; // This hash table is used to identify identical functions. It maps @@ -1356,7 +1357,7 @@ void IdenticalCodeFolding::runOnFunctions(BinaryContext &BC, KeyHash, KeyCongruent> CongruentBuckets; for (auto &BFI : BFs) { auto &BF = BFI.second; - if (!shouldOptimize(BF)) + if (!shouldOptimize(BF) || BF.isFolded()) continue; // Make sure indices are in-order. @@ -1414,9 +1415,11 @@ void IdenticalCodeFolding::runOnFunctions(BinaryContext &BC, Candidates.erase(FI); // Fold the function and remove from the list of processed functions. + BytesSavedEstimate += ChildBF->getSize(); + CallsSavedEstimate += std::min(ChildBF->getKnownExecutionCount(), + ParentBF->getKnownExecutionCount()); BC.foldFunction(*ChildBF, *ParentBF, BFs); - BytesSavedEstimate += ChildBF->getSize(); ++NumFoldedLastIteration; if (ParentBF->hasJumpTables()) @@ -1456,7 +1459,8 @@ void IdenticalCodeFolding::runOnFunctions(BinaryContext &BC, << NumJTFunctionsFolded << " functions had jump tables.\n" << "BOLT-INFO: Removing all identical functions will save " << format("%.2lf", (double) BytesSavedEstimate / 1024) - << " KB of code space.\n"; + << " KB of code space. Folded functions were called " + << CallsSavedEstimate << " times based on profile.\n"; } }