forked from OSchip/llvm-project
[BOLT] Custom function alignment
Summary: A new 'compact' function aligner that takes function sizes in consideration. The approach is based on the following assumptions: -- It is not desirable to introduce a large offset when aligning short functions, as it leads to a lot of "wasted" address space. -- For longer functions, the offset can be larger than the default 32 bytes; However, using 64 bytes for the offset still worsen performance, as again a lot of address space is wasted. -- Cold parts of functions can still use the default max-32 offset. The algorithm is switched on/off by flag 'use-compact-aligner' and is controlled by parameters align-functions-max-bytes and align-cold-functions-max-bytes described above. In my tests the best performance is produced with '-use-compact-aligner=true -align-functions-max-bytes=48 -align-cold-functions-max-bytes=32'. (cherry picked from FBD6194092)
This commit is contained in:
parent
dd6ecdd782
commit
a0c041f72a
|
@ -241,7 +241,13 @@ private:
|
|||
uint64_t MaxSize{std::numeric_limits<uint64_t>::max()};
|
||||
|
||||
/// Alignment requirements for the function.
|
||||
uint64_t Alignment{2};
|
||||
uint16_t Alignment{2};
|
||||
|
||||
/// Maximum number of bytes used for alignment of hot part of the function.
|
||||
uint16_t MaxAlignmentBytes{0};
|
||||
|
||||
/// Maximum number of bytes used for alignment of cold part of the function.
|
||||
uint16_t MaxColdAlignmentBytes{0};
|
||||
|
||||
const MCSymbol *PersonalityFunction{nullptr};
|
||||
uint8_t PersonalityEncoding{dwarf::DW_EH_PE_sdata4 | dwarf::DW_EH_PE_pcrel};
|
||||
|
@ -1580,15 +1586,33 @@ public:
|
|||
return *this;
|
||||
}
|
||||
|
||||
BinaryFunction &setAlignment(uint64_t Align) {
|
||||
BinaryFunction &setAlignment(uint16_t Align) {
|
||||
Alignment = Align;
|
||||
return *this;
|
||||
}
|
||||
|
||||
uint64_t getAlignment() const {
|
||||
uint16_t getAlignment() const {
|
||||
return Alignment;
|
||||
}
|
||||
|
||||
BinaryFunction &setMaxAlignmentBytes(uint16_t MaxAlignBytes) {
|
||||
MaxAlignmentBytes = MaxAlignBytes;
|
||||
return *this;
|
||||
}
|
||||
|
||||
uint16_t getMaxAlignmentBytes() const {
|
||||
return MaxAlignmentBytes;
|
||||
}
|
||||
|
||||
BinaryFunction &setMaxColdAlignmentBytes(uint16_t MaxAlignBytes) {
|
||||
MaxColdAlignmentBytes = MaxAlignBytes;
|
||||
return *this;
|
||||
}
|
||||
|
||||
uint16_t getMaxColdAlignmentBytes() const {
|
||||
return MaxColdAlignmentBytes;
|
||||
}
|
||||
|
||||
BinaryFunction &setImageAddress(uint64_t Address) {
|
||||
ImageAddress = Address;
|
||||
return *this;
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "BinaryPassManager.h"
|
||||
#include "Passes/Aligner.h"
|
||||
#include "Passes/AllocCombiner.h"
|
||||
#include "Passes/FrameOptimizer.h"
|
||||
#include "Passes/IndirectCallPromotion.h"
|
||||
|
@ -393,6 +394,8 @@ void BinaryFunctionPassManager::runAllPasses(
|
|||
llvm::make_unique<SimplifyConditionalTailCalls>(PrintSCTC),
|
||||
opts::SimplifyConditionalTailCalls);
|
||||
|
||||
Manager.registerPass(llvm::make_unique<AlignerPass>());
|
||||
|
||||
// This pass should always run last.*
|
||||
Manager.registerPass(llvm::make_unique<FinalizeFunctions>(PrintFinalized));
|
||||
|
||||
|
|
|
@ -0,0 +1,101 @@
|
|||
//===--- Aligner.cpp ------------------------------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "Aligner.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace opts {
|
||||
extern cl::OptionCategory BoltOptCategory;
|
||||
|
||||
extern cl::opt<bool> Relocs;
|
||||
|
||||
cl::opt<bool>
|
||||
UseCompactAligner("use-compact-aligner",
|
||||
cl::desc("Use compact approach for aligning functions"),
|
||||
cl::init(false),
|
||||
cl::ZeroOrMore,
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
cl::opt<unsigned>
|
||||
AlignFunctions("align-functions",
|
||||
cl::desc("align functions at a given value (relocation mode)"),
|
||||
cl::init(64),
|
||||
cl::ZeroOrMore,
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
cl::opt<unsigned>
|
||||
AlignFunctionsMaxBytes("align-functions-max-bytes",
|
||||
cl::desc("maximum number of bytes to use to align functions"),
|
||||
cl::init(32),
|
||||
cl::ZeroOrMore,
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
} // end namespace opts
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
namespace {
|
||||
|
||||
// Align function to the specified byte-boundary (typically, 64) offsetting
|
||||
// the fuction by not more than the corresponding value
|
||||
void alignMaxBytes(BinaryFunction &Function) {
|
||||
Function.setAlignment(opts::AlignFunctions);
|
||||
Function.setMaxAlignmentBytes(opts::AlignFunctionsMaxBytes);
|
||||
Function.setMaxColdAlignmentBytes(opts::AlignFunctionsMaxBytes);
|
||||
}
|
||||
|
||||
// Align function to the specified byte-boundary (typically, 64) offsetting
|
||||
// the fuction by not more than the minimum over
|
||||
// -- the size of the function
|
||||
// -- the specified number of bytes
|
||||
void alignCompact(BinaryContext &BC, BinaryFunction &Function) {
|
||||
size_t HotSize = 0;
|
||||
size_t ColdSize = 0;
|
||||
for (const auto *BB : Function.layout()) {
|
||||
if (BB->isCold())
|
||||
ColdSize += BC.computeCodeSize(BB->begin(), BB->end());
|
||||
else
|
||||
HotSize += BC.computeCodeSize(BB->begin(), BB->end());
|
||||
}
|
||||
|
||||
Function.setAlignment(opts::AlignFunctions);
|
||||
if (HotSize > 0)
|
||||
Function.setMaxAlignmentBytes(
|
||||
std::min(size_t(opts::AlignFunctionsMaxBytes), HotSize));
|
||||
|
||||
// using the same option, max-align-bytes, both for cold and hot parts of the
|
||||
// functions, as aligning cold functions typically does not affect performance
|
||||
if (ColdSize > 0)
|
||||
Function.setMaxColdAlignmentBytes(
|
||||
std::min(size_t(opts::AlignFunctionsMaxBytes), ColdSize));
|
||||
}
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
void AlignerPass::runOnFunctions(BinaryContext &BC,
|
||||
std::map<uint64_t, BinaryFunction> &BFs,
|
||||
std::set<uint64_t> &LargeFunctions) {
|
||||
if (!opts::Relocs)
|
||||
return;
|
||||
|
||||
for (auto &It : BFs) {
|
||||
auto &Function = It.second;
|
||||
if (opts::UseCompactAligner)
|
||||
alignCompact(BC, Function);
|
||||
else
|
||||
alignMaxBytes(Function);
|
||||
}
|
||||
}
|
||||
|
||||
} // end namespace bolt
|
||||
} // end namespace llvm
|
|
@ -0,0 +1,38 @@
|
|||
//===--------- Passes/Aligner.h -------------------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_ALIGNER_H
|
||||
#define LLVM_TOOLS_LLVM_BOLT_PASSES_ALIGNER_H
|
||||
|
||||
#include "BinaryPasses.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
class AlignerPass : public BinaryFunctionPass {
|
||||
public:
|
||||
explicit AlignerPass() : BinaryFunctionPass(false) {}
|
||||
|
||||
const char *getName() const override {
|
||||
return "aligner";
|
||||
}
|
||||
|
||||
/// Pass entry point
|
||||
void runOnFunctions(BinaryContext &BC,
|
||||
std::map<uint64_t, BinaryFunction> &BFs,
|
||||
std::set<uint64_t> &LargeFunctions) override;
|
||||
};
|
||||
|
||||
} // namespace bolt
|
||||
} // namespace llvm
|
||||
|
||||
|
||||
#endif
|
|
@ -1,4 +1,5 @@
|
|||
add_llvm_library(LLVMBOLTPasses
|
||||
Aligner.cpp
|
||||
AllocCombiner.cpp
|
||||
BinaryPasses.cpp
|
||||
BinaryFunctionCallGraph.cpp
|
||||
|
|
|
@ -90,20 +90,6 @@ OutputFilename("o",
|
|||
cl::Required,
|
||||
cl::cat(BoltOutputCategory));
|
||||
|
||||
cl::opt<unsigned>
|
||||
AlignFunctions("align-functions",
|
||||
cl::desc("align functions at a given value (relocation mode)"),
|
||||
cl::init(64),
|
||||
cl::ZeroOrMore,
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
cl::opt<unsigned>
|
||||
AlignFunctionsMaxBytes("align-functions-max-bytes",
|
||||
cl::desc("maximum number of bytes to use to align functions"),
|
||||
cl::init(32),
|
||||
cl::ZeroOrMore,
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
cl::opt<bool>
|
||||
AllowStripped("allow-stripped",
|
||||
cl::desc("allow processing of stripped binaries"),
|
||||
|
@ -2190,8 +2176,11 @@ void RewriteInstance::emitFunction(MCStreamer &Streamer, BinaryFunction &Functio
|
|||
|
||||
if (opts::Relocs) {
|
||||
Streamer.EmitCodeAlignment(BinaryFunction::MinAlign);
|
||||
Streamer.EmitCodeAlignment(opts::AlignFunctions,
|
||||
opts::AlignFunctionsMaxBytes);
|
||||
auto MaxAlignBytes = EmitColdPart
|
||||
? Function.getMaxColdAlignmentBytes()
|
||||
: Function.getMaxAlignmentBytes();
|
||||
if (MaxAlignBytes > 0)
|
||||
Streamer.EmitCodeAlignment(Function.getAlignment(), MaxAlignBytes);
|
||||
} else {
|
||||
Streamer.EmitCodeAlignment(Function.getAlignment());
|
||||
Streamer.setCodeSkew(EmitColdPart ? 0 : Function.getAddress());
|
||||
|
|
Loading…
Reference in New Issue