diff --git a/bolt/src/BinaryBasicBlock.h b/bolt/src/BinaryBasicBlock.h index cf3b6dd27d63..946de333093e 100644 --- a/bolt/src/BinaryBasicBlock.h +++ b/bolt/src/BinaryBasicBlock.h @@ -80,7 +80,10 @@ private: std::pair InputRange{INVALID_OFFSET, INVALID_OFFSET}; /// Alignment requirements for the block. - uint64_t Alignment{1}; + uint32_t Alignment{1}; + + /// Maximum number of bytes to use for alignment of the block. + uint32_t AlignmentMaxBytes{0}; /// Number of times this basic block was executed. uint64_t ExecutionCount{COUNT_NO_PROFILE}; @@ -499,15 +502,25 @@ public: } /// Set minimum alignment for the basic block. - void setAlignment(uint64_t Align) { + void setAlignment(uint32_t Align) { Alignment = Align; } /// Return required alignment for the block. - uint64_t getAlignment() const { + uint32_t getAlignment() const { return Alignment; } + /// Set the maximum number of bytes to use for the block alignment. + void setAlignmentMaxBytes(uint32_t Value) { + AlignmentMaxBytes = Value; + } + + /// Return the maximum number of bytes to use for the block alignment. + uint32_t getAlignmentMaxBytes() const { + return AlignmentMaxBytes; + } + /// Adds block to successor list, and also updates predecessor list for /// successor block. /// Set branch info for this path. diff --git a/bolt/src/BinaryFunction.cpp b/bolt/src/BinaryFunction.cpp index a127b8e3aea2..dd8ef1eae0c6 100644 --- a/bolt/src/BinaryFunction.cpp +++ b/bolt/src/BinaryFunction.cpp @@ -55,9 +55,10 @@ extern bool shouldProcess(const BinaryFunction &); extern cl::opt UpdateDebugSections; extern cl::opt Verbosity; -static cl::opt +cl::opt AlignBlocks("align-blocks", - cl::desc("try to align BBs inserting nops"), + cl::desc("align basic blocks"), + cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory)); @@ -74,6 +75,13 @@ AlignMacroOpFusion("align-macro-fusion", cl::ZeroOrMore, cl::cat(BoltRelocCategory)); +cl::opt +PreserveBlocksAlignment("preserve-blocks-alignment", + cl::desc("try to preserve basic block alignment"), + cl::init(false), + cl::ZeroOrMore, + cl::cat(BoltOptCategory)); + static cl::opt DotToolTipCode("dot-tooltip-code", cl::desc("add basic block instructions as tool tips on nodes"), @@ -1603,7 +1611,7 @@ bool BinaryFunction::buildCFG() { // Always create new BB at branch destination. PrevBB = InsertBB; InsertBB = addBasicBlock(LI->first, LI->second, - /* DeriveAlignment = */ IsLastInstrNop); + opts::PreserveBlocksAlignment && IsLastInstrNop); if (hasEntryPointAtOffset(Offset)) InsertBB->setEntryPoint(); if (PrevBB) @@ -1631,7 +1639,8 @@ bool BinaryFunction::buildCFG() { } else { InsertBB = addBasicBlock(Offset, BC.Ctx->createTempSymbol("FT", true), - /* DeriveAlignment = */ IsLastInstrNop); + opts::PreserveBlocksAlignment && + IsLastInstrNop); updateOffset(LastInstrOffset); } } @@ -2195,8 +2204,11 @@ void BinaryFunction::emitBody(MCStreamer &Streamer, bool EmitColdPart) { if (EmitColdPart != BB->isCold()) continue; - if (opts::AlignBlocks && BB->getAlignment() > 1) - Streamer.EmitCodeAlignment(BB->getAlignment()); + if ((opts::AlignBlocks || opts::PreserveBlocksAlignment) + && BB->getAlignment() > 1) { + Streamer.EmitCodeAlignment(BB->getAlignment(), + BB->getAlignmentMaxBytes()); + } Streamer.EmitLabel(BB->getLabel()); // Check if special alignment for macro-fusion is needed. diff --git a/bolt/src/Passes/Aligner.cpp b/bolt/src/Passes/Aligner.cpp index 0d56e9ffcfea..c4d4434f5b30 100644 --- a/bolt/src/Passes/Aligner.cpp +++ b/bolt/src/Passes/Aligner.cpp @@ -11,16 +11,34 @@ #include "Aligner.h" +#define DEBUG_TYPE "bolt-aligner" + using namespace llvm; namespace opts { + extern cl::OptionCategory BoltOptCategory; -cl::opt -UseCompactAligner("use-compact-aligner", - cl::desc("Use compact approach for aligning functions"), - cl::init(false), +extern cl::opt AlignBlocks; +extern cl::opt PreserveBlocksAlignment; + +cl::opt +AlignBlocksMinSize("align-blocks-min-size", + cl::desc("minimal size of the basic block that should be aligned"), + cl::init(0), cl::ZeroOrMore, + cl::Hidden, + cl::cat(BoltOptCategory)); + +cl::opt +AlignBlocksThreshold("align-blocks-threshold", + cl::desc("align only blocks with frequency larger than containing function " + "execution frequency specified in percent. E.g. 1000 means aligning " + "blocks that are 10 times more frequently executed than the " + "containing function."), + cl::init(800), + cl::ZeroOrMore, + cl::Hidden, cl::cat(BoltOptCategory)); cl::opt @@ -37,6 +55,20 @@ AlignFunctionsMaxBytes("align-functions-max-bytes", cl::ZeroOrMore, cl::cat(BoltOptCategory)); +cl::opt +BlockAlignment("block-alignment", + cl::desc("boundary to use for alignment of basic blocks"), + cl::init(16), + cl::ZeroOrMore, + cl::cat(BoltOptCategory)); + +cl::opt +UseCompactAligner("use-compact-aligner", + cl::desc("Use compact approach for aligning functions"), + cl::init(false), + cl::ZeroOrMore, + cl::cat(BoltOptCategory)); + } // end namespace opts namespace llvm { @@ -56,9 +88,11 @@ void alignMaxBytes(BinaryFunction &Function) { // the fuction by not more than the minimum over // -- the size of the function // -- the specified number of bytes -void alignCompact(BinaryContext &BC, BinaryFunction &Function) { +void alignCompact(BinaryFunction &Function) { + const auto &BC = Function.getBinaryContext(); size_t HotSize = 0; size_t ColdSize = 0; + for (const auto *BB : Function.layout()) { if (BB->isCold()) ColdSize += BC.computeCodeSize(BB->begin(), BB->end()); @@ -80,19 +114,74 @@ void alignCompact(BinaryContext &BC, BinaryFunction &Function) { } // end anonymous namespace +void AlignerPass::alignBlocks(BinaryFunction &Function) { + if (!Function.hasValidProfile() || !Function.isSimple()) + return; + + const auto &BC = Function.getBinaryContext(); + + const auto FuncCount = std::max(1UL, Function.getKnownExecutionCount()); + BinaryBasicBlock *PrevBB{nullptr}; + for (auto *BB : Function.layout()) { + auto Count = BB->getKnownExecutionCount(); + + if (Count <= FuncCount * opts::AlignBlocksThreshold / 100) { + PrevBB = BB; + continue; + } + + uint64_t FTCount = 0; + if (PrevBB && PrevBB->getFallthrough() == BB) { + FTCount = PrevBB->getBranchInfo(*BB).Count; + } + PrevBB = BB; + + if (Count < FTCount * 2) + continue; + + const auto BlockSize = BC.computeCodeSize(BB->begin(), BB->end()); + const auto BytesToUse = std::min(opts::BlockAlignment - 1UL, BlockSize); + + if (opts::AlignBlocksMinSize && BlockSize < opts::AlignBlocksMinSize) + continue; + + BB->setAlignment(opts::BlockAlignment); + BB->setAlignmentMaxBytes(BytesToUse); + + // Update stats. + AlignHistogram[BytesToUse]++; + AlignedBlocksCount += BB->getKnownExecutionCount(); + } +} + void AlignerPass::runOnFunctions(BinaryContext &BC, std::map &BFs, std::set &LargeFunctions) { if (!BC.HasRelocations) return; + AlignHistogram.resize(opts::BlockAlignment); + for (auto &It : BFs) { auto &Function = It.second; + if (opts::UseCompactAligner) - alignCompact(BC, Function); + alignCompact(Function); else alignMaxBytes(Function); + + if (opts::AlignBlocks && !opts::PreserveBlocksAlignment) + alignBlocks(Function); } + + DEBUG( + dbgs() << "BOLT-DEBUG: max bytes per basic block alignment distribution:\n"; + for (unsigned I = 1; I < AlignHistogram.size(); ++I) { + dbgs() << " " << I << " : " << AlignHistogram[I] << '\n'; + } + dbgs() << "BOLT-DEBUG: total execution count of aligned blocks: " + << AlignedBlocksCount << '\n'; + ); } } // end namespace bolt diff --git a/bolt/src/Passes/Aligner.h b/bolt/src/Passes/Aligner.h index 3164a47a91c8..28e6f6d693b0 100644 --- a/bolt/src/Passes/Aligner.h +++ b/bolt/src/Passes/Aligner.h @@ -18,7 +18,18 @@ namespace llvm { namespace bolt { class AlignerPass : public BinaryFunctionPass { - public: +private: + + /// Stats for usage of max bytes for basic block alignment. + std::vector AlignHistogram; + + /// Stats: execution count of blocks that were aligned. + uint64_t AlignedBlocksCount{0}; + + /// Assign alignment to basic blocks based on profile. + void alignBlocks(BinaryFunction &Function); + +public: explicit AlignerPass() : BinaryFunctionPass(false) {} const char *getName() const override {