2021-12-22 02:21:41 +08:00
|
|
|
//===- bolt/Passes/IdenticalCodeFolding.cpp -------------------------------===//
|
2018-05-23 06:52:21 +08:00
|
|
|
//
|
2021-03-16 09:04:18 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2018-05-23 06:52:21 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
2021-12-22 02:21:41 +08:00
|
|
|
// This file implements the IdenticalCodeFolding class.
|
|
|
|
//
|
2018-05-23 06:52:21 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2021-10-09 02:47:10 +08:00
|
|
|
#include "bolt/Passes/IdenticalCodeFolding.h"
|
|
|
|
#include "bolt/Core/ParallelUtilities.h"
|
2020-12-02 08:29:39 +08:00
|
|
|
#include "llvm/Support/CommandLine.h"
|
2019-06-01 07:45:31 +08:00
|
|
|
#include "llvm/Support/ThreadPool.h"
|
|
|
|
#include "llvm/Support/Timer.h"
|
|
|
|
#include <atomic>
|
2018-05-23 06:52:21 +08:00
|
|
|
#include <map>
|
|
|
|
#include <set>
|
|
|
|
#include <unordered_map>
|
|
|
|
|
|
|
|
#define DEBUG_TYPE "bolt-icf"
|
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
using namespace bolt;
|
|
|
|
|
|
|
|
namespace opts {
|
|
|
|
|
|
|
|
extern cl::OptionCategory BoltOptCategory;
|
|
|
|
|
|
|
|
static cl::opt<bool>
|
|
|
|
UseDFS("icf-dfs",
|
|
|
|
cl::desc("use DFS ordering when using -icf option"),
|
|
|
|
cl::ReallyHidden,
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
cl::cat(BoltOptCategory));
|
2019-07-12 22:25:50 +08:00
|
|
|
|
2019-06-01 07:45:31 +08:00
|
|
|
static cl::opt<bool>
|
|
|
|
TimeICF("time-icf",
|
|
|
|
cl::desc("time icf steps"),
|
|
|
|
cl::ReallyHidden,
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
cl::cat(BoltOptCategory));
|
2018-05-23 06:52:21 +08:00
|
|
|
} // namespace opts
|
|
|
|
|
|
|
|
namespace {
|
2021-06-03 13:46:57 +08:00
|
|
|
using JumpTable = bolt::JumpTable;
|
2018-05-23 06:52:21 +08:00
|
|
|
|
|
|
|
/// Compare two jump tables in 2 functions. The function relies on consistent
|
|
|
|
/// ordering of basic blocks in both binary functions (e.g. DFS).
|
2021-12-15 08:52:51 +08:00
|
|
|
bool equalJumpTables(const JumpTable &JumpTableA, const JumpTable &JumpTableB,
|
2018-05-23 06:52:21 +08:00
|
|
|
const BinaryFunction &FunctionA,
|
|
|
|
const BinaryFunction &FunctionB) {
|
|
|
|
if (JumpTableA.EntrySize != JumpTableB.EntrySize)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (JumpTableA.Type != JumpTableB.Type)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (JumpTableA.getSize() != JumpTableB.getSize())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
for (uint64_t Index = 0; Index < JumpTableA.Entries.size(); ++Index) {
|
2021-04-08 15:19:26 +08:00
|
|
|
const MCSymbol *LabelA = JumpTableA.Entries[Index];
|
|
|
|
const MCSymbol *LabelB = JumpTableB.Entries[Index];
|
2018-05-23 06:52:21 +08:00
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
const BinaryBasicBlock *TargetA = FunctionA.getBasicBlockForLabel(LabelA);
|
|
|
|
const BinaryBasicBlock *TargetB = FunctionB.getBasicBlockForLabel(LabelB);
|
2018-05-23 06:52:21 +08:00
|
|
|
|
|
|
|
if (!TargetA || !TargetB) {
|
|
|
|
assert((TargetA || LabelA == FunctionA.getFunctionEndLabel()) &&
|
|
|
|
"no target basic block found");
|
|
|
|
assert((TargetB || LabelB == FunctionB.getFunctionEndLabel()) &&
|
|
|
|
"no target basic block found");
|
|
|
|
|
|
|
|
if (TargetA != TargetB)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(TargetA && TargetB && "cannot locate target block(s)");
|
|
|
|
|
|
|
|
if (TargetA->getLayoutIndex() != TargetB->getLayoutIndex())
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Helper function that compares an instruction of this function to the
|
|
|
|
/// given instruction of the given function. The functions should have
|
|
|
|
/// identical CFG.
|
|
|
|
template <class Compare>
|
|
|
|
bool isInstrEquivalentWith(const MCInst &InstA, const BinaryBasicBlock &BBA,
|
|
|
|
const MCInst &InstB, const BinaryBasicBlock &BBB,
|
|
|
|
Compare Comp) {
|
|
|
|
if (InstA.getOpcode() != InstB.getOpcode()) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
const BinaryContext &BC = BBA.getFunction()->getBinaryContext();
|
2018-05-23 06:52:21 +08:00
|
|
|
|
|
|
|
// In this function we check for special conditions:
|
|
|
|
//
|
|
|
|
// * instructions with landing pads
|
|
|
|
//
|
|
|
|
// Most of the common cases should be handled by MCPlus::equals()
|
|
|
|
// that compares regular instruction operands.
|
|
|
|
//
|
|
|
|
// NB: there's no need to compare jump table indirect jump instructions
|
|
|
|
// separately as jump tables are handled by comparing corresponding
|
|
|
|
// symbols.
|
2021-04-08 15:19:26 +08:00
|
|
|
const Optional<MCPlus::MCLandingPad> EHInfoA = BC.MIB->getEHInfo(InstA);
|
|
|
|
const Optional<MCPlus::MCLandingPad> EHInfoB = BC.MIB->getEHInfo(InstB);
|
2018-05-23 06:52:21 +08:00
|
|
|
|
|
|
|
if (EHInfoA || EHInfoB) {
|
|
|
|
if (!EHInfoA && (EHInfoB->first || EHInfoB->second))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (!EHInfoB && (EHInfoA->first || EHInfoA->second))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (EHInfoA && EHInfoB) {
|
|
|
|
// Action indices should match.
|
|
|
|
if (EHInfoA->second != EHInfoB->second)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (!EHInfoA->first != !EHInfoB->first)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (EHInfoA->first && EHInfoB->first) {
|
2021-04-08 15:19:26 +08:00
|
|
|
const BinaryBasicBlock *LPA = BBA.getLandingPad(EHInfoA->first);
|
|
|
|
const BinaryBasicBlock *LPB = BBB.getLandingPad(EHInfoB->first);
|
2018-05-23 06:52:21 +08:00
|
|
|
assert(LPA && LPB && "cannot locate landing pad(s)");
|
|
|
|
|
|
|
|
if (LPA->getLayoutIndex() != LPB->getLayoutIndex())
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return BC.MIB->equals(InstA, InstB, Comp);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns true if this function has identical code and CFG with
|
|
|
|
/// the given function \p BF.
|
|
|
|
///
|
2020-04-08 13:10:12 +08:00
|
|
|
/// If \p CongruentSymbols is set to true, then symbolic operands that reference
|
|
|
|
/// potentially identical but different functions are ignored during the
|
|
|
|
/// comparison.
|
2018-05-23 06:52:21 +08:00
|
|
|
bool isIdenticalWith(const BinaryFunction &A, const BinaryFunction &B,
|
2020-04-08 13:10:12 +08:00
|
|
|
bool CongruentSymbols) {
|
2018-05-23 06:52:21 +08:00
|
|
|
assert(A.hasCFG() && B.hasCFG() && "both functions should have CFG");
|
|
|
|
|
|
|
|
// Compare the two functions, one basic block at a time.
|
|
|
|
// Currently we require two identical basic blocks to have identical
|
|
|
|
// instruction sequences and the same index in their corresponding
|
|
|
|
// functions. The latter is important for CFG equality.
|
|
|
|
|
|
|
|
if (A.layout_size() != B.layout_size())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Comparing multi-entry functions could be non-trivial.
|
|
|
|
if (A.isMultiEntry() || B.isMultiEntry())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Process both functions in either DFS or existing order.
|
2021-12-09 14:59:09 +08:00
|
|
|
const BinaryFunction::BasicBlockOrderType &OrderA =
|
2021-04-08 15:19:26 +08:00
|
|
|
opts::UseDFS ? A.dfs() : A.getLayout();
|
2021-12-09 14:59:09 +08:00
|
|
|
const BinaryFunction::BasicBlockOrderType &OrderB =
|
2021-04-08 15:19:26 +08:00
|
|
|
opts::UseDFS ? B.dfs() : B.getLayout();
|
2018-05-23 06:52:21 +08:00
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
const BinaryContext &BC = A.getBinaryContext();
|
2018-05-23 06:52:21 +08:00
|
|
|
|
|
|
|
auto BBI = OrderB.begin();
|
2021-04-08 15:19:26 +08:00
|
|
|
for (const BinaryBasicBlock *BB : OrderA) {
|
|
|
|
const BinaryBasicBlock *OtherBB = *BBI;
|
2018-05-23 06:52:21 +08:00
|
|
|
|
|
|
|
if (BB->getLayoutIndex() != OtherBB->getLayoutIndex())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Compare successor basic blocks.
|
|
|
|
// NOTE: the comparison for jump tables is only partially verified here.
|
|
|
|
if (BB->succ_size() != OtherBB->succ_size())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
auto SuccBBI = OtherBB->succ_begin();
|
2021-04-08 15:19:26 +08:00
|
|
|
for (const BinaryBasicBlock *SuccBB : BB->successors()) {
|
|
|
|
const BinaryBasicBlock *SuccOtherBB = *SuccBBI;
|
2018-05-23 06:52:21 +08:00
|
|
|
if (SuccBB->getLayoutIndex() != SuccOtherBB->getLayoutIndex())
|
|
|
|
return false;
|
|
|
|
++SuccBBI;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Compare all instructions including pseudos.
|
|
|
|
auto I = BB->begin(), E = BB->end();
|
|
|
|
auto OtherI = OtherBB->begin(), OtherE = OtherBB->end();
|
|
|
|
while (I != E && OtherI != OtherE) {
|
2020-04-08 13:10:12 +08:00
|
|
|
// Compare symbols.
|
2021-12-15 08:52:51 +08:00
|
|
|
auto AreSymbolsIdentical = [&](const MCSymbol *SymbolA,
|
|
|
|
const MCSymbol *SymbolB) {
|
2020-04-08 13:10:12 +08:00
|
|
|
if (SymbolA == SymbolB)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
// All local symbols are considered identical since they affect a
|
|
|
|
// control flow and we check the control flow separately.
|
|
|
|
// If a local symbol is escaped, then the function (potentially) has
|
|
|
|
// multiple entry points and we exclude such functions from
|
|
|
|
// comparison.
|
|
|
|
if (SymbolA->isTemporary() && SymbolB->isTemporary())
|
|
|
|
return true;
|
|
|
|
|
|
|
|
// Compare symbols as functions.
|
2021-05-14 01:50:47 +08:00
|
|
|
uint64_t EntryIDA = 0;
|
|
|
|
uint64_t EntryIDB = 0;
|
2021-04-08 15:19:26 +08:00
|
|
|
const BinaryFunction *FunctionA =
|
|
|
|
BC.getFunctionForSymbol(SymbolA, &EntryIDA);
|
|
|
|
const BinaryFunction *FunctionB =
|
|
|
|
BC.getFunctionForSymbol(SymbolB, &EntryIDB);
|
2020-04-08 13:10:12 +08:00
|
|
|
if (FunctionA && EntryIDA)
|
|
|
|
FunctionA = nullptr;
|
|
|
|
if (FunctionB && EntryIDB)
|
|
|
|
FunctionB = nullptr;
|
|
|
|
if (FunctionA && FunctionB) {
|
|
|
|
// Self-referencing functions and recursive calls.
|
|
|
|
if (FunctionA == &A && FunctionB == &B)
|
2018-05-23 06:52:21 +08:00
|
|
|
return true;
|
|
|
|
|
2020-04-08 13:10:12 +08:00
|
|
|
// Functions with different hash values can never become identical,
|
|
|
|
// hence A and B are different.
|
|
|
|
if (CongruentSymbols)
|
|
|
|
return FunctionA->getHash() == FunctionB->getHash();
|
2018-05-23 06:52:21 +08:00
|
|
|
|
2020-04-08 13:10:12 +08:00
|
|
|
return FunctionA == FunctionB;
|
|
|
|
}
|
|
|
|
|
|
|
|
// One of the symbols represents a function, the other one does not.
|
|
|
|
if (FunctionA != FunctionB) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check if symbols are jump tables.
|
2021-04-08 15:19:26 +08:00
|
|
|
const BinaryData *SIA = BC.getBinaryDataByName(SymbolA->getName());
|
2020-04-08 13:10:12 +08:00
|
|
|
if (!SIA)
|
|
|
|
return false;
|
2021-04-08 15:19:26 +08:00
|
|
|
const BinaryData *SIB = BC.getBinaryDataByName(SymbolB->getName());
|
2020-04-08 13:10:12 +08:00
|
|
|
if (!SIB)
|
|
|
|
return false;
|
2018-05-23 06:52:21 +08:00
|
|
|
|
2020-04-08 13:10:12 +08:00
|
|
|
assert((SIA->getAddress() != SIB->getAddress()) &&
|
|
|
|
"different symbols should not have the same value");
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
const JumpTable *JumpTableA =
|
|
|
|
A.getJumpTableContainingAddress(SIA->getAddress());
|
2020-04-08 13:10:12 +08:00
|
|
|
if (!JumpTableA)
|
|
|
|
return false;
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
const JumpTable *JumpTableB =
|
|
|
|
B.getJumpTableContainingAddress(SIB->getAddress());
|
2020-04-08 13:10:12 +08:00
|
|
|
if (!JumpTableB)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if ((SIA->getAddress() - JumpTableA->getAddress()) !=
|
|
|
|
(SIB->getAddress() - JumpTableB->getAddress()))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return equalJumpTables(*JumpTableA, *JumpTableB, A, B);
|
|
|
|
};
|
|
|
|
|
2021-12-15 08:52:51 +08:00
|
|
|
if (!isInstrEquivalentWith(*I, *BB, *OtherI, *OtherBB,
|
|
|
|
AreSymbolsIdentical)) {
|
2018-05-23 06:52:21 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-12-15 08:52:51 +08:00
|
|
|
++I;
|
|
|
|
++OtherI;
|
2018-05-23 06:52:21 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// One of the identical blocks may have a trailing unconditional jump that
|
|
|
|
// is ignored for CFG purposes.
|
2021-04-08 15:19:26 +08:00
|
|
|
const MCInst *TrailingInstr =
|
|
|
|
(I != E ? &(*I) : (OtherI != OtherE ? &(*OtherI) : 0));
|
2018-05-23 06:52:21 +08:00
|
|
|
if (TrailingInstr && !BC.MIB->isUnconditionalBranch(*TrailingInstr)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
++BBI;
|
|
|
|
}
|
|
|
|
|
2020-03-31 10:08:24 +08:00
|
|
|
// Compare exceptions action tables.
|
|
|
|
if (A.getLSDAActionTable() != B.getLSDAActionTable() ||
|
|
|
|
A.getLSDATypeTable() != B.getLSDATypeTable() ||
|
|
|
|
A.getLSDATypeIndexTable() != B.getLSDATypeIndexTable()) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-05-23 06:52:21 +08:00
|
|
|
return true;
|
|
|
|
}
|
2019-06-01 07:45:31 +08:00
|
|
|
|
|
|
|
// This hash table is used to identify identical functions. It maps
|
|
|
|
// a function to a bucket of functions identical to it.
|
|
|
|
struct KeyHash {
|
2021-12-15 08:52:51 +08:00
|
|
|
size_t operator()(const BinaryFunction *F) const { return F->getHash(); }
|
2019-06-01 07:45:31 +08:00
|
|
|
};
|
|
|
|
|
2020-04-08 13:10:12 +08:00
|
|
|
/// Identify two congruent functions. Two functions are considered congruent,
|
|
|
|
/// if they are identical/equal except for some of their instruction operands
|
|
|
|
/// that reference potentially identical functions, i.e. functions that could
|
|
|
|
/// be folded later. Congruent functions are candidates for folding in our
|
|
|
|
/// iterative ICF algorithm.
|
|
|
|
///
|
|
|
|
/// Congruent functions are required to have identical hash.
|
2019-06-01 07:45:31 +08:00
|
|
|
struct KeyCongruent {
|
|
|
|
bool operator()(const BinaryFunction *A, const BinaryFunction *B) const {
|
|
|
|
if (A == B)
|
|
|
|
return true;
|
2020-04-08 13:10:12 +08:00
|
|
|
return isIdenticalWith(*A, *B, /*CongruentSymbols=*/true);
|
2019-06-01 07:45:31 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
struct KeyEqual {
|
|
|
|
bool operator()(const BinaryFunction *A, const BinaryFunction *B) const {
|
|
|
|
if (A == B)
|
|
|
|
return true;
|
2020-04-08 13:10:12 +08:00
|
|
|
return isIdenticalWith(*A, *B, /*CongruentSymbols=*/false);
|
2019-06-01 07:45:31 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
typedef std::unordered_map<BinaryFunction *, std::set<BinaryFunction *>,
|
|
|
|
KeyHash, KeyCongruent>
|
|
|
|
CongruentBucketsMap;
|
|
|
|
|
|
|
|
typedef std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>,
|
|
|
|
KeyHash, KeyEqual>
|
|
|
|
IdenticalBucketsMap;
|
|
|
|
|
2020-04-07 15:21:37 +08:00
|
|
|
std::string hashInteger(uint64_t Value) {
|
|
|
|
std::string HashString;
|
|
|
|
if (Value == 0) {
|
|
|
|
HashString.push_back(0);
|
|
|
|
}
|
|
|
|
while (Value) {
|
|
|
|
uint8_t LSB = Value & 0xff;
|
|
|
|
HashString.push_back(LSB);
|
|
|
|
Value >>= 8;
|
|
|
|
}
|
|
|
|
|
|
|
|
return HashString;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) {
|
|
|
|
std::string HashString;
|
|
|
|
|
|
|
|
// Ignore function references.
|
|
|
|
if (BC.getFunctionForSymbol(&Symbol))
|
|
|
|
return HashString;
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol);
|
2020-04-07 15:21:37 +08:00
|
|
|
if (!ErrorOrValue)
|
|
|
|
return HashString;
|
|
|
|
|
2020-04-08 13:10:12 +08:00
|
|
|
// Ignore jump table references.
|
2020-04-07 15:21:37 +08:00
|
|
|
if (BC.getJumpTableContainingAddress(*ErrorOrValue))
|
|
|
|
return HashString;
|
|
|
|
|
|
|
|
return HashString.append(hashInteger(*ErrorOrValue));
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) {
|
|
|
|
switch (Expr.getKind()) {
|
|
|
|
case MCExpr::Constant:
|
|
|
|
return hashInteger(cast<MCConstantExpr>(Expr).getValue());
|
|
|
|
case MCExpr::SymbolRef:
|
|
|
|
return hashSymbol(BC, cast<MCSymbolRefExpr>(Expr).getSymbol());
|
|
|
|
case MCExpr::Unary: {
|
|
|
|
const auto &UnaryExpr = cast<MCUnaryExpr>(Expr);
|
|
|
|
return hashInteger(UnaryExpr.getOpcode())
|
|
|
|
.append(hashExpr(BC, *UnaryExpr.getSubExpr()));
|
|
|
|
}
|
|
|
|
case MCExpr::Binary: {
|
|
|
|
const auto &BinaryExpr = cast<MCBinaryExpr>(Expr);
|
|
|
|
return hashExpr(BC, *BinaryExpr.getLHS())
|
|
|
|
.append(hashInteger(BinaryExpr.getOpcode()))
|
|
|
|
.append(hashExpr(BC, *BinaryExpr.getRHS()));
|
|
|
|
}
|
|
|
|
case MCExpr::Target:
|
|
|
|
return std::string();
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm_unreachable("invalid expression kind");
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) {
|
|
|
|
if (Operand.isImm()) {
|
|
|
|
return hashInteger(Operand.getImm());
|
|
|
|
} else if (Operand.isReg()) {
|
|
|
|
return hashInteger(Operand.getReg());
|
|
|
|
} else if (Operand.isExpr()) {
|
|
|
|
return hashExpr(BC, *Operand.getExpr());
|
|
|
|
}
|
|
|
|
|
|
|
|
return std::string();
|
|
|
|
}
|
|
|
|
|
2019-06-01 07:45:31 +08:00
|
|
|
} // namespace
|
2018-05-23 06:52:21 +08:00
|
|
|
|
|
|
|
namespace llvm {
|
|
|
|
namespace bolt {
|
|
|
|
|
2019-04-04 06:52:01 +08:00
|
|
|
void IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) {
|
2021-04-08 15:19:26 +08:00
|
|
|
const size_t OriginalFunctionCount = BC.getBinaryFunctions().size();
|
2021-05-14 01:50:47 +08:00
|
|
|
uint64_t NumFunctionsFolded = 0;
|
2019-06-01 07:45:31 +08:00
|
|
|
std::atomic<uint64_t> NumJTFunctionsFolded{0};
|
|
|
|
std::atomic<uint64_t> BytesSavedEstimate{0};
|
|
|
|
std::atomic<uint64_t> CallsSavedEstimate{0};
|
|
|
|
std::atomic<uint64_t> NumFoldedLastIteration{0};
|
|
|
|
CongruentBucketsMap CongruentBuckets;
|
|
|
|
|
|
|
|
// Hash all the functions
|
|
|
|
auto hashFunctions = [&]() {
|
|
|
|
NamedRegionTimer HashFunctionsTimer("hashing", "hashing", "ICF breakdown",
|
|
|
|
"ICF breakdown", opts::TimeICF);
|
2019-07-25 08:13:15 +08:00
|
|
|
ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
|
|
|
|
// Make sure indices are in-order.
|
|
|
|
BF.updateLayoutIndices();
|
2019-06-01 07:45:31 +08:00
|
|
|
|
2019-07-25 08:13:15 +08:00
|
|
|
// Pre-compute hash before pushing into hashtable.
|
2020-04-07 15:21:37 +08:00
|
|
|
// Hash instruction operands to minimize hash collisions.
|
2021-12-15 08:52:51 +08:00
|
|
|
BF.computeHash(opts::UseDFS, [&BC](const MCOperand &Op) {
|
|
|
|
return hashInstOperand(BC, Op);
|
|
|
|
});
|
2019-06-01 07:45:31 +08:00
|
|
|
};
|
2018-05-23 06:52:21 +08:00
|
|
|
|
2019-07-25 08:13:15 +08:00
|
|
|
ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
|
|
|
|
return !shouldOptimize(BF);
|
|
|
|
};
|
2018-05-23 06:52:21 +08:00
|
|
|
|
2019-07-25 08:13:15 +08:00
|
|
|
ParallelUtilities::runOnEachFunction(
|
|
|
|
BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, SkipFunc,
|
|
|
|
"hashFunctions", /*ForceSequential*/ false, 2);
|
2019-06-01 07:45:31 +08:00
|
|
|
};
|
2018-05-23 06:52:21 +08:00
|
|
|
|
2019-06-01 07:45:31 +08:00
|
|
|
// Creates buckets with congruent functions - functions that potentially
|
|
|
|
// could be folded.
|
|
|
|
auto createCongruentBuckets = [&]() {
|
|
|
|
NamedRegionTimer CongruentBucketsTimer("congruent buckets",
|
|
|
|
"congruent buckets", "ICF breakdown",
|
|
|
|
"ICF breakdown", opts::TimeICF);
|
|
|
|
for (auto &BFI : BC.getBinaryFunctions()) {
|
2021-04-08 15:19:26 +08:00
|
|
|
BinaryFunction &BF = BFI.second;
|
2019-06-29 00:21:27 +08:00
|
|
|
if (!this->shouldOptimize(BF))
|
2019-06-01 07:45:31 +08:00
|
|
|
continue;
|
|
|
|
CongruentBuckets[&BF].emplace(&BF);
|
|
|
|
}
|
|
|
|
};
|
2018-05-23 06:52:21 +08:00
|
|
|
|
2019-06-01 07:45:31 +08:00
|
|
|
// Partition each set of congruent functions into sets of identical functions
|
|
|
|
// and fold them
|
|
|
|
auto performFoldingPass = [&]() {
|
|
|
|
NamedRegionTimer FoldingPassesTimer("folding passes", "folding passes",
|
|
|
|
"ICF breakdown", "ICF breakdown",
|
|
|
|
opts::TimeICF);
|
|
|
|
Timer SinglePass("single fold pass", "single fold pass");
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(SinglePass.startTimer());
|
2018-05-23 06:52:21 +08:00
|
|
|
|
2019-07-25 08:13:15 +08:00
|
|
|
ThreadPool *ThPool;
|
|
|
|
if (!opts::NoThreads)
|
|
|
|
ThPool = &ParallelUtilities::getThreadPool();
|
|
|
|
|
|
|
|
// Fold identical functions within a single congruent bucket
|
2020-04-07 15:21:37 +08:00
|
|
|
auto processSingleBucket = [&](std::set<BinaryFunction *> &Candidates) {
|
2019-06-01 07:45:31 +08:00
|
|
|
Timer T("folding single congruent list", "folding single congruent list");
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(T.startTimer());
|
2018-05-23 06:52:21 +08:00
|
|
|
|
|
|
|
// Identical functions go into the same bucket.
|
2019-06-01 07:45:31 +08:00
|
|
|
IdenticalBucketsMap IdenticalBuckets;
|
2021-04-08 15:19:26 +08:00
|
|
|
for (BinaryFunction *BF : Candidates) {
|
2018-05-23 06:52:21 +08:00
|
|
|
IdenticalBuckets[BF].emplace_back(BF);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto &IBI : IdenticalBuckets) {
|
|
|
|
// Functions identified as identical.
|
2021-04-08 15:19:26 +08:00
|
|
|
std::vector<BinaryFunction *> &Twins = IBI.second;
|
2018-05-23 06:52:21 +08:00
|
|
|
if (Twins.size() < 2)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// Fold functions. Keep the order consistent across invocations with
|
|
|
|
// different options.
|
|
|
|
std::stable_sort(Twins.begin(), Twins.end(),
|
2019-06-01 07:45:31 +08:00
|
|
|
[](const BinaryFunction *A, const BinaryFunction *B) {
|
2019-07-25 08:13:15 +08:00
|
|
|
return A->getFunctionNumber() <
|
|
|
|
B->getFunctionNumber();
|
2019-06-01 07:45:31 +08:00
|
|
|
});
|
2018-05-23 06:52:21 +08:00
|
|
|
|
|
|
|
BinaryFunction *ParentBF = Twins[0];
|
2021-04-08 15:19:26 +08:00
|
|
|
for (unsigned I = 1; I < Twins.size(); ++I) {
|
|
|
|
BinaryFunction *ChildBF = Twins[I];
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: folding " << *ChildBF << " into "
|
|
|
|
<< *ParentBF << '\n');
|
2018-05-23 06:52:21 +08:00
|
|
|
|
|
|
|
// Remove child function from the list of candidates.
|
|
|
|
auto FI = Candidates.find(ChildBF);
|
|
|
|
assert(FI != Candidates.end() &&
|
|
|
|
"function expected to be in the set");
|
|
|
|
Candidates.erase(FI);
|
|
|
|
|
|
|
|
// Fold the function and remove from the list of processed functions.
|
|
|
|
BytesSavedEstimate += ChildBF->getSize();
|
|
|
|
CallsSavedEstimate += std::min(ChildBF->getKnownExecutionCount(),
|
|
|
|
ParentBF->getKnownExecutionCount());
|
2019-04-04 06:52:01 +08:00
|
|
|
BC.foldFunction(*ChildBF, *ParentBF);
|
2018-05-23 06:52:21 +08:00
|
|
|
|
|
|
|
++NumFoldedLastIteration;
|
|
|
|
|
|
|
|
if (ParentBF->hasJumpTables())
|
|
|
|
++NumJTFunctionsFolded;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(T.stopTimer());
|
2019-06-01 07:45:31 +08:00
|
|
|
};
|
|
|
|
|
2019-07-25 08:13:15 +08:00
|
|
|
// Create a task for each congruent bucket
|
2019-06-01 07:45:31 +08:00
|
|
|
for (auto &Entry : CongruentBuckets) {
|
2021-04-08 15:19:26 +08:00
|
|
|
std::set<BinaryFunction *> &Bucket = Entry.second;
|
2019-07-25 08:13:15 +08:00
|
|
|
if (Bucket.size() < 2)
|
2019-06-01 07:45:31 +08:00
|
|
|
continue;
|
2019-07-12 22:25:50 +08:00
|
|
|
|
2019-06-01 07:45:31 +08:00
|
|
|
if (opts::NoThreads)
|
2020-04-07 15:21:37 +08:00
|
|
|
processSingleBucket(Bucket);
|
2019-06-01 07:45:31 +08:00
|
|
|
else
|
2020-04-07 15:21:37 +08:00
|
|
|
ThPool->async(processSingleBucket, std::ref(Bucket));
|
2018-05-23 06:52:21 +08:00
|
|
|
}
|
2019-06-01 07:45:31 +08:00
|
|
|
|
2019-07-25 08:13:15 +08:00
|
|
|
if (!opts::NoThreads)
|
|
|
|
ThPool->wait();
|
|
|
|
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(SinglePass.stopTimer());
|
2019-06-01 07:45:31 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
hashFunctions();
|
|
|
|
createCongruentBuckets();
|
|
|
|
|
|
|
|
unsigned Iteration = 1;
|
|
|
|
// We repeat the pass until no new modifications happen.
|
|
|
|
do {
|
|
|
|
NumFoldedLastIteration = 0;
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ICF iteration " << Iteration << "...\n");
|
2019-06-01 07:45:31 +08:00
|
|
|
|
|
|
|
performFoldingPass();
|
|
|
|
|
2018-05-23 06:52:21 +08:00
|
|
|
NumFunctionsFolded += NumFoldedLastIteration;
|
|
|
|
++Iteration;
|
|
|
|
|
|
|
|
} while (NumFoldedLastIteration > 0);
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
LLVM_DEBUG({
|
2018-05-23 06:52:21 +08:00
|
|
|
// Print functions that are congruent but not identical.
|
|
|
|
for (auto &CBI : CongruentBuckets) {
|
2021-04-08 15:19:26 +08:00
|
|
|
std::set<BinaryFunction *> &Candidates = CBI.second;
|
2018-05-23 06:52:21 +08:00
|
|
|
if (Candidates.size() < 2)
|
|
|
|
continue;
|
|
|
|
dbgs() << "BOLT-DEBUG: the following " << Candidates.size()
|
|
|
|
<< " functions (each of size " << (*Candidates.begin())->getSize()
|
|
|
|
<< " bytes) are congruent but not identical:\n";
|
2021-04-08 15:19:26 +08:00
|
|
|
for (BinaryFunction *BF : Candidates) {
|
2018-05-23 06:52:21 +08:00
|
|
|
dbgs() << " " << *BF;
|
|
|
|
if (BF->getKnownExecutionCount()) {
|
|
|
|
dbgs() << " (executed " << BF->getKnownExecutionCount() << " times)";
|
|
|
|
}
|
|
|
|
dbgs() << '\n';
|
|
|
|
}
|
|
|
|
}
|
2021-04-08 15:19:26 +08:00
|
|
|
});
|
2018-05-23 06:52:21 +08:00
|
|
|
|
|
|
|
if (NumFunctionsFolded) {
|
2021-12-15 08:52:51 +08:00
|
|
|
outs() << "BOLT-INFO: ICF folded " << NumFunctionsFolded << " out of "
|
|
|
|
<< OriginalFunctionCount << " functions in " << Iteration
|
|
|
|
<< " passes. " << NumJTFunctionsFolded
|
|
|
|
<< " functions had jump tables.\n"
|
2018-05-23 06:52:21 +08:00
|
|
|
<< "BOLT-INFO: Removing all identical functions will save "
|
2021-12-15 08:52:51 +08:00
|
|
|
<< format("%.2lf", (double)BytesSavedEstimate / 1024)
|
2018-05-23 06:52:21 +08:00
|
|
|
<< " KB of code space. Folded functions were called "
|
|
|
|
<< CallsSavedEstimate << " times based on profile.\n";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace bolt
|
|
|
|
} // namespace llvm
|