[BOLT] Custom function alignment

Summary:
A new 'compact' function aligner that takes function sizes in consideration. The approach is based on the following assumptions:
-- It is not desirable to introduce a large offset when aligning short functions, as it leads to a lot of "wasted" address space.
-- For longer functions, the offset can be larger than the default 32 bytes; However, using 64 bytes for the offset still worsen performance, as again a lot of address space is wasted.
-- Cold parts of functions can still use the default max-32 offset.

The algorithm is switched on/off by flag 'use-compact-aligner' and is controlled by parameters align-functions-max-bytes and align-cold-functions-max-bytes described above. In my tests the best performance is produced with '-use-compact-aligner=true -align-functions-max-bytes=48 -align-cold-functions-max-bytes=32'.

(cherry picked from FBD6194092)
This commit is contained in:
spupyrev 2017-10-27 15:05:31 -07:00 committed by Maksim Panchenko
parent dd6ecdd782
commit a0c041f72a
6 changed files with 175 additions and 19 deletions

View File

@ -241,7 +241,13 @@ private:
uint64_t MaxSize{std::numeric_limits<uint64_t>::max()};
/// Alignment requirements for the function.
uint64_t Alignment{2};
uint16_t Alignment{2};
/// Maximum number of bytes used for alignment of hot part of the function.
uint16_t MaxAlignmentBytes{0};
/// Maximum number of bytes used for alignment of cold part of the function.
uint16_t MaxColdAlignmentBytes{0};
const MCSymbol *PersonalityFunction{nullptr};
uint8_t PersonalityEncoding{dwarf::DW_EH_PE_sdata4 | dwarf::DW_EH_PE_pcrel};
@ -1580,15 +1586,33 @@ public:
return *this;
}
BinaryFunction &setAlignment(uint64_t Align) {
BinaryFunction &setAlignment(uint16_t Align) {
Alignment = Align;
return *this;
}
uint64_t getAlignment() const {
uint16_t getAlignment() const {
return Alignment;
}
BinaryFunction &setMaxAlignmentBytes(uint16_t MaxAlignBytes) {
MaxAlignmentBytes = MaxAlignBytes;
return *this;
}
uint16_t getMaxAlignmentBytes() const {
return MaxAlignmentBytes;
}
BinaryFunction &setMaxColdAlignmentBytes(uint16_t MaxAlignBytes) {
MaxColdAlignmentBytes = MaxAlignBytes;
return *this;
}
uint16_t getMaxColdAlignmentBytes() const {
return MaxColdAlignmentBytes;
}
BinaryFunction &setImageAddress(uint64_t Address) {
ImageAddress = Address;
return *this;

View File

@ -10,6 +10,7 @@
//===----------------------------------------------------------------------===//
#include "BinaryPassManager.h"
#include "Passes/Aligner.h"
#include "Passes/AllocCombiner.h"
#include "Passes/FrameOptimizer.h"
#include "Passes/IndirectCallPromotion.h"
@ -393,6 +394,8 @@ void BinaryFunctionPassManager::runAllPasses(
llvm::make_unique<SimplifyConditionalTailCalls>(PrintSCTC),
opts::SimplifyConditionalTailCalls);
Manager.registerPass(llvm::make_unique<AlignerPass>());
// This pass should always run last.*
Manager.registerPass(llvm::make_unique<FinalizeFunctions>(PrintFinalized));

101
bolt/Passes/Aligner.cpp Normal file
View File

@ -0,0 +1,101 @@
//===--- Aligner.cpp ------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
#include "Aligner.h"
using namespace llvm;
namespace opts {
extern cl::OptionCategory BoltOptCategory;
extern cl::opt<bool> Relocs;
cl::opt<bool>
UseCompactAligner("use-compact-aligner",
cl::desc("Use compact approach for aligning functions"),
cl::init(false),
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
cl::opt<unsigned>
AlignFunctions("align-functions",
cl::desc("align functions at a given value (relocation mode)"),
cl::init(64),
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
cl::opt<unsigned>
AlignFunctionsMaxBytes("align-functions-max-bytes",
cl::desc("maximum number of bytes to use to align functions"),
cl::init(32),
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
} // end namespace opts
namespace llvm {
namespace bolt {
namespace {
// Align function to the specified byte-boundary (typically, 64) offsetting
// the fuction by not more than the corresponding value
void alignMaxBytes(BinaryFunction &Function) {
Function.setAlignment(opts::AlignFunctions);
Function.setMaxAlignmentBytes(opts::AlignFunctionsMaxBytes);
Function.setMaxColdAlignmentBytes(opts::AlignFunctionsMaxBytes);
}
// Align function to the specified byte-boundary (typically, 64) offsetting
// the fuction by not more than the minimum over
// -- the size of the function
// -- the specified number of bytes
void alignCompact(BinaryContext &BC, BinaryFunction &Function) {
size_t HotSize = 0;
size_t ColdSize = 0;
for (const auto *BB : Function.layout()) {
if (BB->isCold())
ColdSize += BC.computeCodeSize(BB->begin(), BB->end());
else
HotSize += BC.computeCodeSize(BB->begin(), BB->end());
}
Function.setAlignment(opts::AlignFunctions);
if (HotSize > 0)
Function.setMaxAlignmentBytes(
std::min(size_t(opts::AlignFunctionsMaxBytes), HotSize));
// using the same option, max-align-bytes, both for cold and hot parts of the
// functions, as aligning cold functions typically does not affect performance
if (ColdSize > 0)
Function.setMaxColdAlignmentBytes(
std::min(size_t(opts::AlignFunctionsMaxBytes), ColdSize));
}
} // end anonymous namespace
void AlignerPass::runOnFunctions(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &LargeFunctions) {
if (!opts::Relocs)
return;
for (auto &It : BFs) {
auto &Function = It.second;
if (opts::UseCompactAligner)
alignCompact(BC, Function);
else
alignMaxBytes(Function);
}
}
} // end namespace bolt
} // end namespace llvm

38
bolt/Passes/Aligner.h Normal file
View File

@ -0,0 +1,38 @@
//===--------- Passes/Aligner.h -------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_ALIGNER_H
#define LLVM_TOOLS_LLVM_BOLT_PASSES_ALIGNER_H
#include "BinaryPasses.h"
namespace llvm {
namespace bolt {
class AlignerPass : public BinaryFunctionPass {
public:
explicit AlignerPass() : BinaryFunctionPass(false) {}
const char *getName() const override {
return "aligner";
}
/// Pass entry point
void runOnFunctions(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &LargeFunctions) override;
};
} // namespace bolt
} // namespace llvm
#endif

View File

@ -1,4 +1,5 @@
add_llvm_library(LLVMBOLTPasses
Aligner.cpp
AllocCombiner.cpp
BinaryPasses.cpp
BinaryFunctionCallGraph.cpp

View File

@ -90,20 +90,6 @@ OutputFilename("o",
cl::Required,
cl::cat(BoltOutputCategory));
cl::opt<unsigned>
AlignFunctions("align-functions",
cl::desc("align functions at a given value (relocation mode)"),
cl::init(64),
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
cl::opt<unsigned>
AlignFunctionsMaxBytes("align-functions-max-bytes",
cl::desc("maximum number of bytes to use to align functions"),
cl::init(32),
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
cl::opt<bool>
AllowStripped("allow-stripped",
cl::desc("allow processing of stripped binaries"),
@ -2190,8 +2176,11 @@ void RewriteInstance::emitFunction(MCStreamer &Streamer, BinaryFunction &Functio
if (opts::Relocs) {
Streamer.EmitCodeAlignment(BinaryFunction::MinAlign);
Streamer.EmitCodeAlignment(opts::AlignFunctions,
opts::AlignFunctionsMaxBytes);
auto MaxAlignBytes = EmitColdPart
? Function.getMaxColdAlignmentBytes()
: Function.getMaxAlignmentBytes();
if (MaxAlignBytes > 0)
Streamer.EmitCodeAlignment(Function.getAlignment(), MaxAlignBytes);
} else {
Streamer.EmitCodeAlignment(Function.getAlignment());
Streamer.setCodeSkew(EmitColdPart ? 0 : Function.getAddress());