llvm-project/bolt/lib/Passes/Aligner.cpp

201 lines
6.0 KiB
C++

//===- bolt/Passes/Aligner.cpp - Pass for optimal code alignment ----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the AlignerPass class.
//
//===----------------------------------------------------------------------===//
#include "bolt/Passes/Aligner.h"
#include "bolt/Core/ParallelUtilities.h"
#define DEBUG_TYPE "bolt-aligner"
using namespace llvm;
namespace opts {
extern cl::OptionCategory BoltOptCategory;
extern cl::opt<bool> AlignBlocks;
extern cl::opt<bool> PreserveBlocksAlignment;
cl::opt<unsigned>
AlignBlocksMinSize("align-blocks-min-size",
cl::desc("minimal size of the basic block that should be aligned"),
cl::init(0),
cl::ZeroOrMore,
cl::Hidden,
cl::cat(BoltOptCategory));
cl::opt<unsigned>
AlignBlocksThreshold("align-blocks-threshold",
cl::desc("align only blocks with frequency larger than containing function "
"execution frequency specified in percent. E.g. 1000 means aligning "
"blocks that are 10 times more frequently executed than the "
"containing function."),
cl::init(800),
cl::ZeroOrMore,
cl::Hidden,
cl::cat(BoltOptCategory));
cl::opt<unsigned>
AlignFunctions("align-functions",
cl::desc("align functions at a given value (relocation mode)"),
cl::init(64),
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
cl::opt<unsigned>
AlignFunctionsMaxBytes("align-functions-max-bytes",
cl::desc("maximum number of bytes to use to align functions"),
cl::init(32),
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
cl::opt<unsigned>
BlockAlignment("block-alignment",
cl::desc("boundary to use for alignment of basic blocks"),
cl::init(16),
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
cl::opt<bool>
UseCompactAligner("use-compact-aligner",
cl::desc("Use compact approach for aligning functions"),
cl::init(true),
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
} // end namespace opts
namespace llvm {
namespace bolt {
namespace {
// Align function to the specified byte-boundary (typically, 64) offsetting
// the fuction by not more than the corresponding value
void alignMaxBytes(BinaryFunction &Function) {
Function.setAlignment(opts::AlignFunctions);
Function.setMaxAlignmentBytes(opts::AlignFunctionsMaxBytes);
Function.setMaxColdAlignmentBytes(opts::AlignFunctionsMaxBytes);
}
// Align function to the specified byte-boundary (typically, 64) offsetting
// the fuction by not more than the minimum over
// -- the size of the function
// -- the specified number of bytes
void alignCompact(BinaryFunction &Function, const MCCodeEmitter *Emitter) {
const BinaryContext &BC = Function.getBinaryContext();
size_t HotSize = 0;
size_t ColdSize = 0;
for (const BinaryBasicBlock *BB : Function.layout())
if (BB->isCold())
ColdSize += BC.computeCodeSize(BB->begin(), BB->end(), Emitter);
else
HotSize += BC.computeCodeSize(BB->begin(), BB->end(), Emitter);
Function.setAlignment(opts::AlignFunctions);
if (HotSize > 0)
Function.setMaxAlignmentBytes(
std::min(size_t(opts::AlignFunctionsMaxBytes), HotSize));
// using the same option, max-align-bytes, both for cold and hot parts of the
// functions, as aligning cold functions typically does not affect performance
if (ColdSize > 0)
Function.setMaxColdAlignmentBytes(
std::min(size_t(opts::AlignFunctionsMaxBytes), ColdSize));
}
} // end anonymous namespace
void AlignerPass::alignBlocks(BinaryFunction &Function,
const MCCodeEmitter *Emitter) {
if (!Function.hasValidProfile() || !Function.isSimple())
return;
const BinaryContext &BC = Function.getBinaryContext();
const uint64_t FuncCount =
std::max<uint64_t>(1, Function.getKnownExecutionCount());
BinaryBasicBlock *PrevBB = nullptr;
for (BinaryBasicBlock *BB : Function.layout()) {
uint64_t Count = BB->getKnownExecutionCount();
if (Count <= FuncCount * opts::AlignBlocksThreshold / 100) {
PrevBB = BB;
continue;
}
uint64_t FTCount = 0;
if (PrevBB && PrevBB->getFallthrough() == BB)
FTCount = PrevBB->getBranchInfo(*BB).Count;
PrevBB = BB;
if (Count < FTCount * 2)
continue;
const uint64_t BlockSize =
BC.computeCodeSize(BB->begin(), BB->end(), Emitter);
const uint64_t BytesToUse =
std::min<uint64_t>(opts::BlockAlignment - 1, BlockSize);
if (opts::AlignBlocksMinSize && BlockSize < opts::AlignBlocksMinSize)
continue;
BB->setAlignment(opts::BlockAlignment);
BB->setAlignmentMaxBytes(BytesToUse);
// Update stats.
LLVM_DEBUG(
std::unique_lock<std::shared_timed_mutex> Lock(AlignHistogramMtx);
AlignHistogram[BytesToUse]++;
AlignedBlocksCount += BB->getKnownExecutionCount();
);
}
}
void AlignerPass::runOnFunctions(BinaryContext &BC) {
if (!BC.HasRelocations)
return;
AlignHistogram.resize(opts::BlockAlignment);
ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
// Create a separate MCCodeEmitter to allow lock free execution
BinaryContext::IndependentCodeEmitter Emitter =
BC.createIndependentMCCodeEmitter();
if (opts::UseCompactAligner)
alignCompact(BF, Emitter.MCE.get());
else
alignMaxBytes(BF);
if (opts::AlignBlocks && !opts::PreserveBlocksAlignment)
alignBlocks(BF, Emitter.MCE.get());
};
ParallelUtilities::runOnEachFunction(
BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun,
ParallelUtilities::PredicateTy(nullptr), "AlignerPass");
LLVM_DEBUG(
dbgs() << "BOLT-DEBUG: max bytes per basic block alignment distribution:\n";
for (unsigned I = 1; I < AlignHistogram.size(); ++I)
dbgs() << " " << I << " : " << AlignHistogram[I] << '\n';
dbgs() << "BOLT-DEBUG: total execution count of aligned blocks: "
<< AlignedBlocksCount << '\n';
);
}
} // end namespace bolt
} // end namespace llvm