forked from OSchip/llvm-project
[BOLT] Align basic blocks based on execution count
Summary: The default is not changing, i.e. we are not aligning code within a function by default. New meaning of options for aligning basic blocks: -align-blocks triggers basic block alignment based on profile -preserve-blocks-alignment tries to preserve basic block alignment seen on input Tuning options for "-align-blocks": -align-blocks-min-size=<uint> blocks smaller than the specified size wouldn't be aligned -align-blocks-threshold=<uint> align only blocks with frequency larger than containing function execution frequency specified in percent. E.g. 1000 means aligning blocks that are 10 times more frequently executed than the containing function. (cherry picked from FBD7921980)
This commit is contained in:
parent
9c6f965616
commit
bdf21f7617
|
@ -80,7 +80,10 @@ private:
|
|||
std::pair<uint32_t, uint32_t> InputRange{INVALID_OFFSET, INVALID_OFFSET};
|
||||
|
||||
/// Alignment requirements for the block.
|
||||
uint64_t Alignment{1};
|
||||
uint32_t Alignment{1};
|
||||
|
||||
/// Maximum number of bytes to use for alignment of the block.
|
||||
uint32_t AlignmentMaxBytes{0};
|
||||
|
||||
/// Number of times this basic block was executed.
|
||||
uint64_t ExecutionCount{COUNT_NO_PROFILE};
|
||||
|
@ -499,15 +502,25 @@ public:
|
|||
}
|
||||
|
||||
/// Set minimum alignment for the basic block.
|
||||
void setAlignment(uint64_t Align) {
|
||||
void setAlignment(uint32_t Align) {
|
||||
Alignment = Align;
|
||||
}
|
||||
|
||||
/// Return required alignment for the block.
|
||||
uint64_t getAlignment() const {
|
||||
uint32_t getAlignment() const {
|
||||
return Alignment;
|
||||
}
|
||||
|
||||
/// Set the maximum number of bytes to use for the block alignment.
|
||||
void setAlignmentMaxBytes(uint32_t Value) {
|
||||
AlignmentMaxBytes = Value;
|
||||
}
|
||||
|
||||
/// Return the maximum number of bytes to use for the block alignment.
|
||||
uint32_t getAlignmentMaxBytes() const {
|
||||
return AlignmentMaxBytes;
|
||||
}
|
||||
|
||||
/// Adds block to successor list, and also updates predecessor list for
|
||||
/// successor block.
|
||||
/// Set branch info for this path.
|
||||
|
|
|
@ -55,9 +55,10 @@ extern bool shouldProcess(const BinaryFunction &);
|
|||
extern cl::opt<bool> UpdateDebugSections;
|
||||
extern cl::opt<unsigned> Verbosity;
|
||||
|
||||
static cl::opt<bool>
|
||||
cl::opt<bool>
|
||||
AlignBlocks("align-blocks",
|
||||
cl::desc("try to align BBs inserting nops"),
|
||||
cl::desc("align basic blocks"),
|
||||
cl::init(false),
|
||||
cl::ZeroOrMore,
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
|
@ -74,6 +75,13 @@ AlignMacroOpFusion("align-macro-fusion",
|
|||
cl::ZeroOrMore,
|
||||
cl::cat(BoltRelocCategory));
|
||||
|
||||
cl::opt<bool>
|
||||
PreserveBlocksAlignment("preserve-blocks-alignment",
|
||||
cl::desc("try to preserve basic block alignment"),
|
||||
cl::init(false),
|
||||
cl::ZeroOrMore,
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
static cl::opt<bool>
|
||||
DotToolTipCode("dot-tooltip-code",
|
||||
cl::desc("add basic block instructions as tool tips on nodes"),
|
||||
|
@ -1603,7 +1611,7 @@ bool BinaryFunction::buildCFG() {
|
|||
// Always create new BB at branch destination.
|
||||
PrevBB = InsertBB;
|
||||
InsertBB = addBasicBlock(LI->first, LI->second,
|
||||
/* DeriveAlignment = */ IsLastInstrNop);
|
||||
opts::PreserveBlocksAlignment && IsLastInstrNop);
|
||||
if (hasEntryPointAtOffset(Offset))
|
||||
InsertBB->setEntryPoint();
|
||||
if (PrevBB)
|
||||
|
@ -1631,7 +1639,8 @@ bool BinaryFunction::buildCFG() {
|
|||
} else {
|
||||
InsertBB = addBasicBlock(Offset,
|
||||
BC.Ctx->createTempSymbol("FT", true),
|
||||
/* DeriveAlignment = */ IsLastInstrNop);
|
||||
opts::PreserveBlocksAlignment &&
|
||||
IsLastInstrNop);
|
||||
updateOffset(LastInstrOffset);
|
||||
}
|
||||
}
|
||||
|
@ -2195,8 +2204,11 @@ void BinaryFunction::emitBody(MCStreamer &Streamer, bool EmitColdPart) {
|
|||
if (EmitColdPart != BB->isCold())
|
||||
continue;
|
||||
|
||||
if (opts::AlignBlocks && BB->getAlignment() > 1)
|
||||
Streamer.EmitCodeAlignment(BB->getAlignment());
|
||||
if ((opts::AlignBlocks || opts::PreserveBlocksAlignment)
|
||||
&& BB->getAlignment() > 1) {
|
||||
Streamer.EmitCodeAlignment(BB->getAlignment(),
|
||||
BB->getAlignmentMaxBytes());
|
||||
}
|
||||
Streamer.EmitLabel(BB->getLabel());
|
||||
|
||||
// Check if special alignment for macro-fusion is needed.
|
||||
|
|
|
@ -11,16 +11,34 @@
|
|||
|
||||
#include "Aligner.h"
|
||||
|
||||
#define DEBUG_TYPE "bolt-aligner"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace opts {
|
||||
|
||||
extern cl::OptionCategory BoltOptCategory;
|
||||
|
||||
cl::opt<bool>
|
||||
UseCompactAligner("use-compact-aligner",
|
||||
cl::desc("Use compact approach for aligning functions"),
|
||||
cl::init(false),
|
||||
extern cl::opt<bool> AlignBlocks;
|
||||
extern cl::opt<bool> PreserveBlocksAlignment;
|
||||
|
||||
cl::opt<unsigned>
|
||||
AlignBlocksMinSize("align-blocks-min-size",
|
||||
cl::desc("minimal size of the basic block that should be aligned"),
|
||||
cl::init(0),
|
||||
cl::ZeroOrMore,
|
||||
cl::Hidden,
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
cl::opt<unsigned>
|
||||
AlignBlocksThreshold("align-blocks-threshold",
|
||||
cl::desc("align only blocks with frequency larger than containing function "
|
||||
"execution frequency specified in percent. E.g. 1000 means aligning "
|
||||
"blocks that are 10 times more frequently executed than the "
|
||||
"containing function."),
|
||||
cl::init(800),
|
||||
cl::ZeroOrMore,
|
||||
cl::Hidden,
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
cl::opt<unsigned>
|
||||
|
@ -37,6 +55,20 @@ AlignFunctionsMaxBytes("align-functions-max-bytes",
|
|||
cl::ZeroOrMore,
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
cl::opt<unsigned>
|
||||
BlockAlignment("block-alignment",
|
||||
cl::desc("boundary to use for alignment of basic blocks"),
|
||||
cl::init(16),
|
||||
cl::ZeroOrMore,
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
cl::opt<bool>
|
||||
UseCompactAligner("use-compact-aligner",
|
||||
cl::desc("Use compact approach for aligning functions"),
|
||||
cl::init(false),
|
||||
cl::ZeroOrMore,
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
} // end namespace opts
|
||||
|
||||
namespace llvm {
|
||||
|
@ -56,9 +88,11 @@ void alignMaxBytes(BinaryFunction &Function) {
|
|||
// the fuction by not more than the minimum over
|
||||
// -- the size of the function
|
||||
// -- the specified number of bytes
|
||||
void alignCompact(BinaryContext &BC, BinaryFunction &Function) {
|
||||
void alignCompact(BinaryFunction &Function) {
|
||||
const auto &BC = Function.getBinaryContext();
|
||||
size_t HotSize = 0;
|
||||
size_t ColdSize = 0;
|
||||
|
||||
for (const auto *BB : Function.layout()) {
|
||||
if (BB->isCold())
|
||||
ColdSize += BC.computeCodeSize(BB->begin(), BB->end());
|
||||
|
@ -80,19 +114,74 @@ void alignCompact(BinaryContext &BC, BinaryFunction &Function) {
|
|||
|
||||
} // end anonymous namespace
|
||||
|
||||
void AlignerPass::alignBlocks(BinaryFunction &Function) {
|
||||
if (!Function.hasValidProfile() || !Function.isSimple())
|
||||
return;
|
||||
|
||||
const auto &BC = Function.getBinaryContext();
|
||||
|
||||
const auto FuncCount = std::max(1UL, Function.getKnownExecutionCount());
|
||||
BinaryBasicBlock *PrevBB{nullptr};
|
||||
for (auto *BB : Function.layout()) {
|
||||
auto Count = BB->getKnownExecutionCount();
|
||||
|
||||
if (Count <= FuncCount * opts::AlignBlocksThreshold / 100) {
|
||||
PrevBB = BB;
|
||||
continue;
|
||||
}
|
||||
|
||||
uint64_t FTCount = 0;
|
||||
if (PrevBB && PrevBB->getFallthrough() == BB) {
|
||||
FTCount = PrevBB->getBranchInfo(*BB).Count;
|
||||
}
|
||||
PrevBB = BB;
|
||||
|
||||
if (Count < FTCount * 2)
|
||||
continue;
|
||||
|
||||
const auto BlockSize = BC.computeCodeSize(BB->begin(), BB->end());
|
||||
const auto BytesToUse = std::min(opts::BlockAlignment - 1UL, BlockSize);
|
||||
|
||||
if (opts::AlignBlocksMinSize && BlockSize < opts::AlignBlocksMinSize)
|
||||
continue;
|
||||
|
||||
BB->setAlignment(opts::BlockAlignment);
|
||||
BB->setAlignmentMaxBytes(BytesToUse);
|
||||
|
||||
// Update stats.
|
||||
AlignHistogram[BytesToUse]++;
|
||||
AlignedBlocksCount += BB->getKnownExecutionCount();
|
||||
}
|
||||
}
|
||||
|
||||
void AlignerPass::runOnFunctions(BinaryContext &BC,
|
||||
std::map<uint64_t, BinaryFunction> &BFs,
|
||||
std::set<uint64_t> &LargeFunctions) {
|
||||
if (!BC.HasRelocations)
|
||||
return;
|
||||
|
||||
AlignHistogram.resize(opts::BlockAlignment);
|
||||
|
||||
for (auto &It : BFs) {
|
||||
auto &Function = It.second;
|
||||
|
||||
if (opts::UseCompactAligner)
|
||||
alignCompact(BC, Function);
|
||||
alignCompact(Function);
|
||||
else
|
||||
alignMaxBytes(Function);
|
||||
|
||||
if (opts::AlignBlocks && !opts::PreserveBlocksAlignment)
|
||||
alignBlocks(Function);
|
||||
}
|
||||
|
||||
DEBUG(
|
||||
dbgs() << "BOLT-DEBUG: max bytes per basic block alignment distribution:\n";
|
||||
for (unsigned I = 1; I < AlignHistogram.size(); ++I) {
|
||||
dbgs() << " " << I << " : " << AlignHistogram[I] << '\n';
|
||||
}
|
||||
dbgs() << "BOLT-DEBUG: total execution count of aligned blocks: "
|
||||
<< AlignedBlocksCount << '\n';
|
||||
);
|
||||
}
|
||||
|
||||
} // end namespace bolt
|
||||
|
|
|
@ -18,7 +18,18 @@ namespace llvm {
|
|||
namespace bolt {
|
||||
|
||||
class AlignerPass : public BinaryFunctionPass {
|
||||
public:
|
||||
private:
|
||||
|
||||
/// Stats for usage of max bytes for basic block alignment.
|
||||
std::vector<uint32_t> AlignHistogram;
|
||||
|
||||
/// Stats: execution count of blocks that were aligned.
|
||||
uint64_t AlignedBlocksCount{0};
|
||||
|
||||
/// Assign alignment to basic blocks based on profile.
|
||||
void alignBlocks(BinaryFunction &Function);
|
||||
|
||||
public:
|
||||
explicit AlignerPass() : BinaryFunctionPass(false) {}
|
||||
|
||||
const char *getName() const override {
|
||||
|
|
Loading…
Reference in New Issue