[BOLT] Align basic blocks based on execution count

Summary:
The default is not changing, i.e. we are not aligning code within a
function by default.

New meaning of options for aligning basic blocks:

  -align-blocks
      triggers basic block alignment based on profile

  -preserve-blocks-alignment
      tries to preserve basic block alignment seen on input

Tuning options for "-align-blocks":
  -align-blocks-min-size=<uint>
      blocks smaller than the specified size wouldn't be aligned

  -align-blocks-threshold=<uint>
      align only blocks with frequency larger than containing function
      execution frequency specified in percent. E.g. 1000 means aligning
      blocks that are 10 times more frequently executed than the containing
      function.

(cherry picked from FBD7921980)
This commit is contained in:
Maksim Panchenko 2017-11-07 15:42:28 -08:00
parent 9c6f965616
commit bdf21f7617
4 changed files with 141 additions and 16 deletions

View File

@ -80,7 +80,10 @@ private:
std::pair<uint32_t, uint32_t> InputRange{INVALID_OFFSET, INVALID_OFFSET};
/// Alignment requirements for the block.
uint64_t Alignment{1};
uint32_t Alignment{1};
/// Maximum number of bytes to use for alignment of the block.
uint32_t AlignmentMaxBytes{0};
/// Number of times this basic block was executed.
uint64_t ExecutionCount{COUNT_NO_PROFILE};
@ -499,15 +502,25 @@ public:
}
/// Set minimum alignment for the basic block.
void setAlignment(uint64_t Align) {
void setAlignment(uint32_t Align) {
Alignment = Align;
}
/// Return required alignment for the block.
uint64_t getAlignment() const {
uint32_t getAlignment() const {
return Alignment;
}
/// Set the maximum number of bytes to use for the block alignment.
void setAlignmentMaxBytes(uint32_t Value) {
AlignmentMaxBytes = Value;
}
/// Return the maximum number of bytes to use for the block alignment.
uint32_t getAlignmentMaxBytes() const {
return AlignmentMaxBytes;
}
/// Adds block to successor list, and also updates predecessor list for
/// successor block.
/// Set branch info for this path.

View File

@ -55,9 +55,10 @@ extern bool shouldProcess(const BinaryFunction &);
extern cl::opt<bool> UpdateDebugSections;
extern cl::opt<unsigned> Verbosity;
static cl::opt<bool>
cl::opt<bool>
AlignBlocks("align-blocks",
cl::desc("try to align BBs inserting nops"),
cl::desc("align basic blocks"),
cl::init(false),
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
@ -74,6 +75,13 @@ AlignMacroOpFusion("align-macro-fusion",
cl::ZeroOrMore,
cl::cat(BoltRelocCategory));
cl::opt<bool>
PreserveBlocksAlignment("preserve-blocks-alignment",
cl::desc("try to preserve basic block alignment"),
cl::init(false),
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
static cl::opt<bool>
DotToolTipCode("dot-tooltip-code",
cl::desc("add basic block instructions as tool tips on nodes"),
@ -1603,7 +1611,7 @@ bool BinaryFunction::buildCFG() {
// Always create new BB at branch destination.
PrevBB = InsertBB;
InsertBB = addBasicBlock(LI->first, LI->second,
/* DeriveAlignment = */ IsLastInstrNop);
opts::PreserveBlocksAlignment && IsLastInstrNop);
if (hasEntryPointAtOffset(Offset))
InsertBB->setEntryPoint();
if (PrevBB)
@ -1631,7 +1639,8 @@ bool BinaryFunction::buildCFG() {
} else {
InsertBB = addBasicBlock(Offset,
BC.Ctx->createTempSymbol("FT", true),
/* DeriveAlignment = */ IsLastInstrNop);
opts::PreserveBlocksAlignment &&
IsLastInstrNop);
updateOffset(LastInstrOffset);
}
}
@ -2195,8 +2204,11 @@ void BinaryFunction::emitBody(MCStreamer &Streamer, bool EmitColdPart) {
if (EmitColdPart != BB->isCold())
continue;
if (opts::AlignBlocks && BB->getAlignment() > 1)
Streamer.EmitCodeAlignment(BB->getAlignment());
if ((opts::AlignBlocks || opts::PreserveBlocksAlignment)
&& BB->getAlignment() > 1) {
Streamer.EmitCodeAlignment(BB->getAlignment(),
BB->getAlignmentMaxBytes());
}
Streamer.EmitLabel(BB->getLabel());
// Check if special alignment for macro-fusion is needed.

View File

@ -11,16 +11,34 @@
#include "Aligner.h"
#define DEBUG_TYPE "bolt-aligner"
using namespace llvm;
namespace opts {
extern cl::OptionCategory BoltOptCategory;
cl::opt<bool>
UseCompactAligner("use-compact-aligner",
cl::desc("Use compact approach for aligning functions"),
cl::init(false),
extern cl::opt<bool> AlignBlocks;
extern cl::opt<bool> PreserveBlocksAlignment;
cl::opt<unsigned>
AlignBlocksMinSize("align-blocks-min-size",
cl::desc("minimal size of the basic block that should be aligned"),
cl::init(0),
cl::ZeroOrMore,
cl::Hidden,
cl::cat(BoltOptCategory));
cl::opt<unsigned>
AlignBlocksThreshold("align-blocks-threshold",
cl::desc("align only blocks with frequency larger than containing function "
"execution frequency specified in percent. E.g. 1000 means aligning "
"blocks that are 10 times more frequently executed than the "
"containing function."),
cl::init(800),
cl::ZeroOrMore,
cl::Hidden,
cl::cat(BoltOptCategory));
cl::opt<unsigned>
@ -37,6 +55,20 @@ AlignFunctionsMaxBytes("align-functions-max-bytes",
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
cl::opt<unsigned>
BlockAlignment("block-alignment",
cl::desc("boundary to use for alignment of basic blocks"),
cl::init(16),
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
cl::opt<bool>
UseCompactAligner("use-compact-aligner",
cl::desc("Use compact approach for aligning functions"),
cl::init(false),
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
} // end namespace opts
namespace llvm {
@ -56,9 +88,11 @@ void alignMaxBytes(BinaryFunction &Function) {
// the fuction by not more than the minimum over
// -- the size of the function
// -- the specified number of bytes
void alignCompact(BinaryContext &BC, BinaryFunction &Function) {
void alignCompact(BinaryFunction &Function) {
const auto &BC = Function.getBinaryContext();
size_t HotSize = 0;
size_t ColdSize = 0;
for (const auto *BB : Function.layout()) {
if (BB->isCold())
ColdSize += BC.computeCodeSize(BB->begin(), BB->end());
@ -80,19 +114,74 @@ void alignCompact(BinaryContext &BC, BinaryFunction &Function) {
} // end anonymous namespace
void AlignerPass::alignBlocks(BinaryFunction &Function) {
if (!Function.hasValidProfile() || !Function.isSimple())
return;
const auto &BC = Function.getBinaryContext();
const auto FuncCount = std::max(1UL, Function.getKnownExecutionCount());
BinaryBasicBlock *PrevBB{nullptr};
for (auto *BB : Function.layout()) {
auto Count = BB->getKnownExecutionCount();
if (Count <= FuncCount * opts::AlignBlocksThreshold / 100) {
PrevBB = BB;
continue;
}
uint64_t FTCount = 0;
if (PrevBB && PrevBB->getFallthrough() == BB) {
FTCount = PrevBB->getBranchInfo(*BB).Count;
}
PrevBB = BB;
if (Count < FTCount * 2)
continue;
const auto BlockSize = BC.computeCodeSize(BB->begin(), BB->end());
const auto BytesToUse = std::min(opts::BlockAlignment - 1UL, BlockSize);
if (opts::AlignBlocksMinSize && BlockSize < opts::AlignBlocksMinSize)
continue;
BB->setAlignment(opts::BlockAlignment);
BB->setAlignmentMaxBytes(BytesToUse);
// Update stats.
AlignHistogram[BytesToUse]++;
AlignedBlocksCount += BB->getKnownExecutionCount();
}
}
void AlignerPass::runOnFunctions(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &LargeFunctions) {
if (!BC.HasRelocations)
return;
AlignHistogram.resize(opts::BlockAlignment);
for (auto &It : BFs) {
auto &Function = It.second;
if (opts::UseCompactAligner)
alignCompact(BC, Function);
alignCompact(Function);
else
alignMaxBytes(Function);
if (opts::AlignBlocks && !opts::PreserveBlocksAlignment)
alignBlocks(Function);
}
DEBUG(
dbgs() << "BOLT-DEBUG: max bytes per basic block alignment distribution:\n";
for (unsigned I = 1; I < AlignHistogram.size(); ++I) {
dbgs() << " " << I << " : " << AlignHistogram[I] << '\n';
}
dbgs() << "BOLT-DEBUG: total execution count of aligned blocks: "
<< AlignedBlocksCount << '\n';
);
}
} // end namespace bolt

View File

@ -18,7 +18,18 @@ namespace llvm {
namespace bolt {
class AlignerPass : public BinaryFunctionPass {
public:
private:
/// Stats for usage of max bytes for basic block alignment.
std::vector<uint32_t> AlignHistogram;
/// Stats: execution count of blocks that were aligned.
uint64_t AlignedBlocksCount{0};
/// Assign alignment to basic blocks based on profile.
void alignBlocks(BinaryFunction &Function);
public:
explicit AlignerPass() : BinaryFunctionPass(false) {}
const char *getName() const override {