forked from OSchip/llvm-project
Run reorder blocks in parallel
Summary: This diff change reorderBasicBlocks pass to run in parallel, it does so by adding locks to the fix branches function, and creating temporary MCCodeEmitters when estimating basic block code size. (cherry picked from FBD16161149)
This commit is contained in:
parent
1169f1fdd8
commit
9977b03fea
|
@ -12,6 +12,7 @@
|
||||||
#include "BinaryBasicBlock.h"
|
#include "BinaryBasicBlock.h"
|
||||||
#include "BinaryContext.h"
|
#include "BinaryContext.h"
|
||||||
#include "BinaryFunction.h"
|
#include "BinaryFunction.h"
|
||||||
|
#include "ParallelUtilities.h"
|
||||||
#include "llvm/ADT/StringRef.h"
|
#include "llvm/ADT/StringRef.h"
|
||||||
#include "llvm/MC/MCAsmInfo.h"
|
#include "llvm/MC/MCAsmInfo.h"
|
||||||
#include "llvm/MC/MCContext.h"
|
#include "llvm/MC/MCContext.h"
|
||||||
|
@ -455,6 +456,7 @@ void BinaryBasicBlock::addBranchInstruction(const BinaryBasicBlock *Successor) {
|
||||||
assert(isSuccessor(Successor));
|
assert(isSuccessor(Successor));
|
||||||
auto &BC = Function->getBinaryContext();
|
auto &BC = Function->getBinaryContext();
|
||||||
MCInst NewInst;
|
MCInst NewInst;
|
||||||
|
std::unique_lock<std::shared_timed_mutex> Lock(BC.CtxMutex);
|
||||||
BC.MIB->createUncondBranch(NewInst, Successor->getLabel(), BC.Ctx.get());
|
BC.MIB->createUncondBranch(NewInst, Successor->getLabel(), BC.Ctx.get());
|
||||||
Instructions.emplace_back(std::move(NewInst));
|
Instructions.emplace_back(std::move(NewInst));
|
||||||
}
|
}
|
||||||
|
@ -537,8 +539,8 @@ void BinaryBasicBlock::dump() const {
|
||||||
outs() << "\n";
|
outs() << "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t BinaryBasicBlock::estimateSize() const {
|
uint64_t BinaryBasicBlock::estimateSize(const MCCodeEmitter *Emitter) const {
|
||||||
return Function->getBinaryContext().computeCodeSize(begin(), end());
|
return Function->getBinaryContext().computeCodeSize(begin(), end(), Emitter);
|
||||||
}
|
}
|
||||||
|
|
||||||
BinaryBasicBlock::BinaryBranchInfo &
|
BinaryBasicBlock::BinaryBranchInfo &
|
||||||
|
|
|
@ -16,14 +16,15 @@
|
||||||
|
|
||||||
#include "llvm/ADT/GraphTraits.h"
|
#include "llvm/ADT/GraphTraits.h"
|
||||||
#include "llvm/ADT/StringRef.h"
|
#include "llvm/ADT/StringRef.h"
|
||||||
|
#include "llvm/MC/MCCodeEmitter.h"
|
||||||
#include "llvm/MC/MCInst.h"
|
#include "llvm/MC/MCInst.h"
|
||||||
#include "llvm/MC/MCSymbol.h"
|
#include "llvm/MC/MCSymbol.h"
|
||||||
#include "llvm/Support/Debug.h"
|
#include "llvm/Support/Debug.h"
|
||||||
#include "llvm/Support/ErrorOr.h"
|
#include "llvm/Support/ErrorOr.h"
|
||||||
#include "llvm/Support/raw_ostream.h"
|
#include "llvm/Support/raw_ostream.h"
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <utility>
|
|
||||||
#include <set>
|
#include <set>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
namespace llvm {
|
namespace llvm {
|
||||||
|
|
||||||
|
@ -870,8 +871,11 @@ public:
|
||||||
return InputRange.second - InputRange.first;
|
return InputRange.second - InputRange.first;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns an estimate of size of basic block during run time.
|
/// Returns an estimate of size of basic block during run time optionally
|
||||||
uint64_t estimateSize() const;
|
/// using a user-supplied emitter for lock-free multi-thread work.
|
||||||
|
/// MCCodeEmitter is not thread safe and each thread should operate with its
|
||||||
|
/// own copy of it.
|
||||||
|
uint64_t estimateSize(const MCCodeEmitter *Emitter = nullptr) const;
|
||||||
|
|
||||||
/// Return index in the current layout. The user is responsible for
|
/// Return index in the current layout. The user is responsible for
|
||||||
/// making sure the indices are up to date,
|
/// making sure the indices are up to date,
|
||||||
|
|
|
@ -1615,26 +1615,23 @@ BinaryContext::calculateEmittedSize(BinaryFunction &BF) {
|
||||||
BF.fixBranches();
|
BF.fixBranches();
|
||||||
|
|
||||||
// Create local MC context to isolate the effect of ephemeral code emission.
|
// Create local MC context to isolate the effect of ephemeral code emission.
|
||||||
std::unique_ptr<MCObjectFileInfo> LocalMOFI =
|
auto MCEInstance = createIndependentMCCodeEmitter();
|
||||||
llvm::make_unique<MCObjectFileInfo>();
|
auto *LocalCtx = MCEInstance.LocalCtx.get();
|
||||||
std::unique_ptr<MCContext> LocalCtx =
|
|
||||||
llvm::make_unique<MCContext>(AsmInfo.get(), MRI.get(), LocalMOFI.get());
|
|
||||||
LocalMOFI->InitMCObjectFileInfo(*TheTriple, /*PIC=*/false, *LocalCtx);
|
|
||||||
auto *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
|
auto *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
|
||||||
auto *MCE = TheTarget->createMCCodeEmitter(*MII, *MRI, *LocalCtx);
|
|
||||||
SmallString<256> Code;
|
SmallString<256> Code;
|
||||||
raw_svector_ostream VecOS(Code);
|
raw_svector_ostream VecOS(Code);
|
||||||
|
|
||||||
std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
|
std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
|
||||||
*TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), VecOS,
|
*TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), VecOS,
|
||||||
std::unique_ptr<MCCodeEmitter>(MCE), *STI,
|
std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI,
|
||||||
/* RelaxAll */ false,
|
/* RelaxAll */ false,
|
||||||
/* IncrementalLinkerCompatible */ false,
|
/* IncrementalLinkerCompatible */ false,
|
||||||
/* DWARFMustBeAtTheEnd */ false));
|
/* DWARFMustBeAtTheEnd */ false));
|
||||||
|
|
||||||
Streamer->InitSections(false);
|
Streamer->InitSections(false);
|
||||||
|
|
||||||
auto *Section = LocalMOFI->getTextSection();
|
auto *Section = MCEInstance.LocalMOFI->getTextSection();
|
||||||
Section->setHasInstructions(true);
|
Section->setHasInstructions(true);
|
||||||
|
|
||||||
auto *StartLabel = LocalCtx->getOrCreateSymbol("__hstart");
|
auto *StartLabel = LocalCtx->getOrCreateSymbol("__hstart");
|
||||||
|
|
|
@ -915,26 +915,33 @@ public:
|
||||||
/// size is for the cold one.
|
/// size is for the cold one.
|
||||||
std::pair<size_t, size_t> calculateEmittedSize(BinaryFunction &BF);
|
std::pair<size_t, size_t> calculateEmittedSize(BinaryFunction &BF);
|
||||||
|
|
||||||
/// Calculate the size of the instruction \p Inst.
|
/// Calculate the size of the instruction \p Inst optionally using a
|
||||||
uint64_t computeInstructionSize(const MCInst &Inst) const {
|
/// user-supplied emitter for lock-free multi-thread work. MCCodeEmitter is
|
||||||
|
/// not thread safe and each thread should operate with its own copy of it.
|
||||||
|
uint64_t
|
||||||
|
computeInstructionSize(const MCInst &Inst,
|
||||||
|
const MCCodeEmitter *Emitter = nullptr) const {
|
||||||
|
if (!Emitter)
|
||||||
|
Emitter = this->MCE.get();
|
||||||
SmallString<256> Code;
|
SmallString<256> Code;
|
||||||
SmallVector<MCFixup, 4> Fixups;
|
SmallVector<MCFixup, 4> Fixups;
|
||||||
raw_svector_ostream VecOS(Code);
|
raw_svector_ostream VecOS(Code);
|
||||||
MCE->encodeInstruction(Inst, VecOS, Fixups, *STI);
|
Emitter->encodeInstruction(Inst, VecOS, Fixups, *STI);
|
||||||
|
|
||||||
return Code.size();
|
return Code.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Compute the native code size for a range of instructions.
|
/// Compute the native code size for a range of instructions.
|
||||||
/// Note: this can be imprecise wrt the final binary since happening prior to
|
/// Note: this can be imprecise wrt the final binary since happening prior to
|
||||||
/// relaxation, as well as wrt the original binary because of opcode
|
/// relaxation, as well as wrt the original binary because of opcode
|
||||||
/// shortening.
|
/// shortening.MCCodeEmitter is not thread safe and each thread should operate
|
||||||
|
/// with its own copy of it.
|
||||||
template <typename Itr>
|
template <typename Itr>
|
||||||
uint64_t computeCodeSize(Itr Beg, Itr End) const {
|
uint64_t computeCodeSize(Itr Beg, Itr End,
|
||||||
|
const MCCodeEmitter *Emitter = nullptr) const {
|
||||||
uint64_t Size = 0;
|
uint64_t Size = 0;
|
||||||
while (Beg != End) {
|
while (Beg != End) {
|
||||||
if (!MII->get(Beg->getOpcode()).isPseudo())
|
if (!MII->get(Beg->getOpcode()).isPseudo())
|
||||||
Size += computeInstructionSize(*Beg);
|
Size += computeInstructionSize(*Beg, Emitter);
|
||||||
++Beg;
|
++Beg;
|
||||||
}
|
}
|
||||||
return Size;
|
return Size;
|
||||||
|
@ -999,6 +1006,30 @@ public:
|
||||||
|
|
||||||
void exitWithBugReport(StringRef Message,
|
void exitWithBugReport(StringRef Message,
|
||||||
const BinaryFunction &Function) const;
|
const BinaryFunction &Function) const;
|
||||||
|
|
||||||
|
struct IndependentCodeEmitter {
|
||||||
|
std::unique_ptr<MCObjectFileInfo> LocalMOFI;
|
||||||
|
std::unique_ptr<MCContext> LocalCtx;
|
||||||
|
std::unique_ptr<MCCodeEmitter> MCE;
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Encapsulates an independent MCCodeEmitter that doesn't share resources
|
||||||
|
/// with the main one available through BinaryContext::MCE, managed by
|
||||||
|
/// BinaryContext.
|
||||||
|
/// This is intended to create a lock-free environment for an auxiliary thread
|
||||||
|
/// that needs to perform work with an MCCodeEmitter that can be transient or
|
||||||
|
/// won't be used in the main code emitter.
|
||||||
|
IndependentCodeEmitter createIndependentMCCodeEmitter() const {
|
||||||
|
IndependentCodeEmitter MCEInstance;
|
||||||
|
MCEInstance.LocalMOFI = llvm::make_unique<MCObjectFileInfo>();
|
||||||
|
MCEInstance.LocalCtx = llvm::make_unique<MCContext>(
|
||||||
|
AsmInfo.get(), MRI.get(), MCEInstance.LocalMOFI.get());
|
||||||
|
MCEInstance.LocalMOFI->InitMCObjectFileInfo(*TheTriple, /*PIC=*/false,
|
||||||
|
*MCEInstance.LocalCtx);
|
||||||
|
MCEInstance.MCE.reset(
|
||||||
|
TheTarget->createMCCodeEmitter(*MII, *MRI, *MCEInstance.LocalCtx));
|
||||||
|
return MCEInstance;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace bolt
|
} // namespace bolt
|
||||||
|
|
|
@ -2634,7 +2634,7 @@ void BinaryFunction::emitBody(MCStreamer &Streamer, bool EmitColdPart,
|
||||||
bool EmitCodeOnly, bool LabelsForOffsets) {
|
bool EmitCodeOnly, bool LabelsForOffsets) {
|
||||||
if (!EmitCodeOnly && EmitColdPart && hasConstantIsland())
|
if (!EmitCodeOnly && EmitColdPart && hasConstantIsland())
|
||||||
duplicateConstantIslands();
|
duplicateConstantIslands();
|
||||||
|
|
||||||
// Track first emitted instruction with debug info.
|
// Track first emitted instruction with debug info.
|
||||||
bool FirstInstr = true;
|
bool FirstInstr = true;
|
||||||
for (auto BB : layout()) {
|
for (auto BB : layout()) {
|
||||||
|
@ -2699,7 +2699,9 @@ void BinaryFunction::emitBody(MCStreamer &Streamer, bool EmitColdPart,
|
||||||
|
|
||||||
// Prepare to tag this location with a label if we need to keep track of
|
// Prepare to tag this location with a label if we need to keep track of
|
||||||
// the location of calls/returns for BOLT address translation maps
|
// the location of calls/returns for BOLT address translation maps
|
||||||
if (LabelsForOffsets && BC.MIB->hasAnnotation(Instr, "Offset")) {
|
if (!EmitCodeOnly && LabelsForOffsets &&
|
||||||
|
BC.MIB->hasAnnotation(Instr, "Offset")) {
|
||||||
|
|
||||||
MCSymbol *LocSym = BC.Ctx->createTempSymbol(/*CanBeUnnamed=*/true);
|
MCSymbol *LocSym = BC.Ctx->createTempSymbol(/*CanBeUnnamed=*/true);
|
||||||
Streamer.EmitLabel(LocSym);
|
Streamer.EmitLabel(LocSym);
|
||||||
BC.MIB->addAnnotation(Instr, "LocSym",
|
BC.MIB->addAnnotation(Instr, "LocSym",
|
||||||
|
@ -2708,11 +2710,11 @@ void BinaryFunction::emitBody(MCStreamer &Streamer, bool EmitColdPart,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Emit SDT labels
|
// Emit SDT labels
|
||||||
if (BC.MIB->hasAnnotation(Instr, "SDTMarker")) {
|
if (!EmitCodeOnly && BC.MIB->hasAnnotation(Instr, "SDTMarker")) {
|
||||||
auto OriginalAddress =
|
auto OriginalAddress =
|
||||||
BC.MIB->tryGetAnnotationAs<uint64_t>(Instr, "SDTMarker").get();
|
BC.MIB->tryGetAnnotationAs<uint64_t>(Instr, "SDTMarker").get();
|
||||||
auto *SDTLabel = BC.SDTMarkers[OriginalAddress].Label;
|
auto *SDTLabel = BC.SDTMarkers[OriginalAddress].Label;
|
||||||
|
|
||||||
// A given symbol should only be emitted as a label once
|
// A given symbol should only be emitted as a label once
|
||||||
if (SDTLabel->isUndefined())
|
if (SDTLabel->isUndefined())
|
||||||
Streamer.EmitLabel(SDTLabel);
|
Streamer.EmitLabel(SDTLabel);
|
||||||
|
@ -3263,9 +3265,13 @@ void BinaryFunction::fixBranches() {
|
||||||
const auto *FSuccessor = BB->getConditionalSuccessor(false);
|
const auto *FSuccessor = BB->getConditionalSuccessor(false);
|
||||||
if (NextBB && NextBB == TSuccessor) {
|
if (NextBB && NextBB == TSuccessor) {
|
||||||
std::swap(TSuccessor, FSuccessor);
|
std::swap(TSuccessor, FSuccessor);
|
||||||
MIB->reverseBranchCondition(*CondBranch, TSuccessor->getLabel(), Ctx);
|
{
|
||||||
|
std::unique_lock<std::shared_timed_mutex> Lock(BC.CtxMutex);
|
||||||
|
MIB->reverseBranchCondition(*CondBranch, TSuccessor->getLabel(), Ctx);
|
||||||
|
}
|
||||||
BB->swapConditionalSuccessors();
|
BB->swapConditionalSuccessors();
|
||||||
} else {
|
} else {
|
||||||
|
std::unique_lock<std::shared_timed_mutex> Lock(BC.CtxMutex);
|
||||||
MIB->replaceBranchTarget(*CondBranch, TSuccessor->getLabel(), Ctx);
|
MIB->replaceBranchTarget(*CondBranch, TSuccessor->getLabel(), Ctx);
|
||||||
}
|
}
|
||||||
if (TSuccessor == FSuccessor) {
|
if (TSuccessor == FSuccessor) {
|
||||||
|
@ -3279,7 +3285,11 @@ void BinaryFunction::fixBranches() {
|
||||||
TSuccessor->isCold() != FSuccessor->isCold() &&
|
TSuccessor->isCold() != FSuccessor->isCold() &&
|
||||||
BB->isCold() != TSuccessor->isCold()) {
|
BB->isCold() != TSuccessor->isCold()) {
|
||||||
std::swap(TSuccessor, FSuccessor);
|
std::swap(TSuccessor, FSuccessor);
|
||||||
MIB->reverseBranchCondition(*CondBranch, TSuccessor->getLabel(), Ctx);
|
{
|
||||||
|
std::unique_lock<std::shared_timed_mutex> Lock(BC.CtxMutex);
|
||||||
|
MIB->reverseBranchCondition(*CondBranch, TSuccessor->getLabel(),
|
||||||
|
Ctx);
|
||||||
|
}
|
||||||
BB->swapConditionalSuccessors();
|
BB->swapConditionalSuccessors();
|
||||||
}
|
}
|
||||||
BB->addBranchInstruction(FSuccessor);
|
BB->addBranchInstruction(FSuccessor);
|
||||||
|
@ -3849,13 +3859,13 @@ SMLoc BinaryFunction::emitLineInfo(SMLoc NewLoc, SMLoc PrevLoc,
|
||||||
// Always emit is_stmt at the beginning of function fragment.
|
// Always emit is_stmt at the beginning of function fragment.
|
||||||
if (FirstInstr)
|
if (FirstInstr)
|
||||||
Flags |= DWARF2_FLAG_IS_STMT;
|
Flags |= DWARF2_FLAG_IS_STMT;
|
||||||
|
|
||||||
BC.Ctx->setCurrentDwarfLoc(
|
BC.Ctx->setCurrentDwarfLoc(
|
||||||
CurrentFilenum,
|
CurrentFilenum,
|
||||||
CurrentRow.Line,
|
CurrentRow.Line,
|
||||||
CurrentRow.Column,
|
CurrentRow.Column,
|
||||||
Flags,
|
Flags,
|
||||||
CurrentRow.Isa,
|
CurrentRow.Isa,
|
||||||
CurrentRow.Discriminator);
|
CurrentRow.Discriminator);
|
||||||
BC.Ctx->setDwarfCompileUnitID(FunctionUnitIndex);
|
BC.Ctx->setDwarfCompileUnitID(FunctionUnitIndex);
|
||||||
|
|
||||||
|
|
|
@ -20,13 +20,13 @@
|
||||||
namespace opts {
|
namespace opts {
|
||||||
extern cl::OptionCategory BoltCategory;
|
extern cl::OptionCategory BoltCategory;
|
||||||
|
|
||||||
cl::opt<unsigned>
|
cl::opt<unsigned>
|
||||||
ThreadCount("thread-count",
|
ThreadCount("thread-count",
|
||||||
cl::desc("number of threads"),
|
cl::desc("number of threads"),
|
||||||
cl::init(hardware_concurrency()),
|
cl::init(hardware_concurrency()),
|
||||||
cl::cat(BoltCategory));
|
cl::cat(BoltCategory));
|
||||||
|
|
||||||
cl::opt<bool>
|
cl::opt<bool>
|
||||||
NoThreads("no-threads",
|
NoThreads("no-threads",
|
||||||
cl::desc("disable multithreading"),
|
cl::desc("disable multithreading"),
|
||||||
cl::init(false),
|
cl::init(false),
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
#include "ParallelUtilities.h"
|
#include "ParallelUtilities.h"
|
||||||
#include "Passes/ReorderAlgorithm.h"
|
#include "Passes/ReorderAlgorithm.h"
|
||||||
#include "llvm/Support/Options.h"
|
#include "llvm/Support/Options.h"
|
||||||
|
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
@ -335,31 +336,32 @@ void ReorderBasicBlocks::runOnFunctions(BinaryContext &BC) {
|
||||||
return;
|
return;
|
||||||
|
|
||||||
IsAArch64 = BC.isAArch64();
|
IsAArch64 = BC.isAArch64();
|
||||||
|
std::atomic<uint64_t> ModifiedFuncCount{0};
|
||||||
|
|
||||||
uint64_t ModifiedFuncCount = 0;
|
ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
|
||||||
for (auto &It : BC.getBinaryFunctions()) {
|
|
||||||
auto &Function = It.second;
|
|
||||||
|
|
||||||
if (!shouldOptimize(Function))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
const bool ShouldSplit =
|
const bool ShouldSplit =
|
||||||
(opts::SplitFunctions == BinaryFunction::ST_ALL) ||
|
(opts::SplitFunctions == BinaryFunction::ST_ALL) ||
|
||||||
(opts::SplitFunctions == BinaryFunction::ST_EH &&
|
(opts::SplitFunctions == BinaryFunction::ST_EH && BF.hasEHRanges()) ||
|
||||||
Function.hasEHRanges()) ||
|
BF.shouldSplit();
|
||||||
Function.shouldSplit();
|
modifyFunctionLayout(BF, opts::ReorderBlocks, opts::MinBranchClusters,
|
||||||
modifyFunctionLayout(Function, opts::ReorderBlocks, opts::MinBranchClusters,
|
|
||||||
ShouldSplit);
|
ShouldSplit);
|
||||||
|
if (BF.hasLayoutChanged()) {
|
||||||
if (Function.hasLayoutChanged()) {
|
|
||||||
++ModifiedFuncCount;
|
++ModifiedFuncCount;
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
|
|
||||||
|
ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
|
||||||
|
return !shouldOptimize(BF);
|
||||||
|
};
|
||||||
|
|
||||||
|
ParallelUtilities::runOnEachFunction(
|
||||||
|
BC, ParallelUtilities::SchedulingPolicy::SP_LINEAR, WorkFun, SkipFunc,
|
||||||
|
"ReorderBasicBlocks");
|
||||||
|
|
||||||
outs() << "BOLT-INFO: basic block reordering modified layout of "
|
outs() << "BOLT-INFO: basic block reordering modified layout of "
|
||||||
<< format("%zu (%.2lf%%) functions\n",
|
<< format("%zu (%.2lf%%) functions\n", ModifiedFuncCount.load(),
|
||||||
ModifiedFuncCount,
|
100.0 * ModifiedFuncCount.load() /
|
||||||
100.0 * ModifiedFuncCount / BC.getBinaryFunctions().size());
|
BC.getBinaryFunctions().size());
|
||||||
|
|
||||||
if (opts::PrintFuncStat > 0) {
|
if (opts::PrintFuncStat > 0) {
|
||||||
raw_ostream &OS = outs();
|
raw_ostream &OS = outs();
|
||||||
|
@ -373,8 +375,8 @@ void ReorderBasicBlocks::runOnFunctions(BinaryContext &BC) {
|
||||||
|
|
||||||
OS << "\nBOLT-INFO: Printing Function Statistics:\n\n";
|
OS << "\nBOLT-INFO: Printing Function Statistics:\n\n";
|
||||||
OS << " There are " << BFs.size() << " functions in total. \n";
|
OS << " There are " << BFs.size() << " functions in total. \n";
|
||||||
OS << " Number of functions being modified: " << ModifiedFuncCount
|
OS << " Number of functions being modified: "
|
||||||
<< "\n";
|
<< ModifiedFuncCount.load() << "\n";
|
||||||
OS << " User asks for detailed information on top "
|
OS << " User asks for detailed information on top "
|
||||||
<< opts::PrintFuncStat << " functions. (Ranked by function score)"
|
<< opts::PrintFuncStat << " functions. (Ranked by function score)"
|
||||||
<< "\n\n";
|
<< "\n\n";
|
||||||
|
|
|
@ -23,6 +23,7 @@ using EdgeList = std::vector<std::pair<BinaryBasicBlock *, uint64_t>>;
|
||||||
namespace opts {
|
namespace opts {
|
||||||
|
|
||||||
extern cl::OptionCategory BoltOptCategory;
|
extern cl::OptionCategory BoltOptCategory;
|
||||||
|
extern cl::opt<bool> NoThreads;
|
||||||
|
|
||||||
cl::opt<unsigned>
|
cl::opt<unsigned>
|
||||||
ClusterSplitThreshold("cluster-split-threshold",
|
ClusterSplitThreshold("cluster-split-threshold",
|
||||||
|
@ -288,6 +289,12 @@ private:
|
||||||
ExecutionCounts[BB->getLayoutIndex()] = EC;
|
ExecutionCounts[BB->getLayoutIndex()] = EC;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Create a separate MCCodeEmitter to allow lock-free execution
|
||||||
|
BinaryContext::IndependentCodeEmitter Emitter;
|
||||||
|
if (!opts::NoThreads) {
|
||||||
|
Emitter = BF.getBinaryContext().createIndependentMCCodeEmitter();
|
||||||
|
}
|
||||||
|
|
||||||
// Initialize clusters
|
// Initialize clusters
|
||||||
Clusters.reserve(BF.layout_size());
|
Clusters.reserve(BF.layout_size());
|
||||||
AllClusters.reserve(BF.layout_size());
|
AllClusters.reserve(BF.layout_size());
|
||||||
|
@ -295,7 +302,8 @@ private:
|
||||||
Size.reserve(BF.layout_size());
|
Size.reserve(BF.layout_size());
|
||||||
for (auto BB : BF.layout()) {
|
for (auto BB : BF.layout()) {
|
||||||
size_t Index = BB->getLayoutIndex();
|
size_t Index = BB->getLayoutIndex();
|
||||||
Size.push_back(std::max<uint64_t>(BB->estimateSize(), 1));
|
Size.push_back(
|
||||||
|
std::max<uint64_t>(BB->estimateSize(Emitter.MCE.get()), 1));
|
||||||
AllClusters.emplace_back(BB, ExecutionCounts[Index], Size[Index]);
|
AllClusters.emplace_back(BB, ExecutionCounts[Index], Size[Index]);
|
||||||
Clusters.push_back(&AllClusters[Index]);
|
Clusters.push_back(&AllClusters[Index]);
|
||||||
CurCluster.push_back(&AllClusters[Index]);
|
CurCluster.push_back(&AllClusters[Index]);
|
||||||
|
|
|
@ -27,6 +27,7 @@ using namespace bolt;
|
||||||
namespace opts {
|
namespace opts {
|
||||||
|
|
||||||
extern cl::OptionCategory BoltOptCategory;
|
extern cl::OptionCategory BoltOptCategory;
|
||||||
|
extern cl::opt<bool> NoThreads;
|
||||||
|
|
||||||
static cl::opt<bool>
|
static cl::opt<bool>
|
||||||
PrintClusters("print-clusters",
|
PrintClusters("print-clusters",
|
||||||
|
@ -65,7 +66,13 @@ struct HashPair {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ClusterAlgorithm::computeClusterAverageFrequency() {
|
void ClusterAlgorithm::computeClusterAverageFrequency(const BinaryContext &BC) {
|
||||||
|
// Create a separate MCCodeEmitter to allow lock-free execution
|
||||||
|
BinaryContext::IndependentCodeEmitter Emitter;
|
||||||
|
if (!opts::NoThreads) {
|
||||||
|
Emitter = BC.createIndependentMCCodeEmitter();
|
||||||
|
}
|
||||||
|
|
||||||
AvgFreq.resize(Clusters.size(), 0.0);
|
AvgFreq.resize(Clusters.size(), 0.0);
|
||||||
for (uint32_t I = 0, E = Clusters.size(); I < E; ++I) {
|
for (uint32_t I = 0, E = Clusters.size(); I < E; ++I) {
|
||||||
double Freq = 0.0;
|
double Freq = 0.0;
|
||||||
|
@ -75,7 +82,7 @@ void ClusterAlgorithm::computeClusterAverageFrequency() {
|
||||||
Freq += BB->getExecutionCount();
|
Freq += BB->getExecutionCount();
|
||||||
// Estimate the size of a block in bytes at run time
|
// Estimate the size of a block in bytes at run time
|
||||||
// NOTE: This might be inaccurate
|
// NOTE: This might be inaccurate
|
||||||
ClusterSize += BB->estimateSize();
|
ClusterSize += BB->estimateSize(Emitter.MCE.get());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
AvgFreq[I] = ClusterSize == 0 ? 0 : Freq / ClusterSize;
|
AvgFreq[I] = ClusterSize == 0 ? 0 : Freq / ClusterSize;
|
||||||
|
@ -525,7 +532,7 @@ void OptimizeBranchReorderAlgorithm::reorderBasicBlocks(
|
||||||
auto &ClusterEdges = CAlgo->ClusterEdges;
|
auto &ClusterEdges = CAlgo->ClusterEdges;
|
||||||
|
|
||||||
// Compute clusters' average frequencies.
|
// Compute clusters' average frequencies.
|
||||||
CAlgo->computeClusterAverageFrequency();
|
CAlgo->computeClusterAverageFrequency(BF.getBinaryContext());
|
||||||
std::vector<double> &AvgFreq = CAlgo->AvgFreq;
|
std::vector<double> &AvgFreq = CAlgo->AvgFreq;
|
||||||
|
|
||||||
if (opts::PrintClusters)
|
if (opts::PrintClusters)
|
||||||
|
@ -621,13 +628,13 @@ void OptimizeCacheReorderAlgorithm::reorderBasicBlocks(
|
||||||
const BinaryFunction &BF, BasicBlockOrder &Order) const {
|
const BinaryFunction &BF, BasicBlockOrder &Order) const {
|
||||||
if (BF.layout_empty())
|
if (BF.layout_empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Cluster basic blocks.
|
// Cluster basic blocks.
|
||||||
CAlgo->clusterBasicBlocks(BF);
|
CAlgo->clusterBasicBlocks(BF);
|
||||||
std::vector<ClusterAlgorithm::ClusterTy> &Clusters = CAlgo->Clusters;
|
std::vector<ClusterAlgorithm::ClusterTy> &Clusters = CAlgo->Clusters;
|
||||||
|
|
||||||
// Compute clusters' average frequencies.
|
// Compute clusters' average frequencies.
|
||||||
CAlgo->computeClusterAverageFrequency();
|
CAlgo->computeClusterAverageFrequency(BF.getBinaryContext());
|
||||||
std::vector<double> &AvgFreq = CAlgo->AvgFreq;
|
std::vector<double> &AvgFreq = CAlgo->AvgFreq;
|
||||||
|
|
||||||
if (opts::PrintClusters)
|
if (opts::PrintClusters)
|
||||||
|
|
|
@ -53,7 +53,7 @@ public:
|
||||||
/// the sum of average frequencies of its blocks (execution count / # instrs).
|
/// the sum of average frequencies of its blocks (execution count / # instrs).
|
||||||
/// The average frequencies are stored in the AvgFreq vector, index by the
|
/// The average frequencies are stored in the AvgFreq vector, index by the
|
||||||
/// cluster indices in the Clusters vector.
|
/// cluster indices in the Clusters vector.
|
||||||
void computeClusterAverageFrequency();
|
void computeClusterAverageFrequency(const BinaryContext &BC);
|
||||||
|
|
||||||
/// Clear clusters and related info.
|
/// Clear clusters and related info.
|
||||||
virtual void reset();
|
virtual void reset();
|
||||||
|
|
Loading…
Reference in New Issue