[BOLT] Introduce lite processing mode without relocations

Summary:
When optimizing a binary without relocations, we can skip processing
functions without profile (cold functions). By skipping processing of
cold functions, we reduce the processing time and memory. We call
such mode a lite mode, and it is enabled by default.

Some processing is still done for functions without profile even in lite
mode. scanExternalRefs() function is used to detect secondary entry
points to functions that are not marked in the symbol table.

Note that the no-relocation requirement is a temporary limitation
of the lite mode.

(cherry picked from FBD21366567)
This commit is contained in:
Maksim Panchenko 2020-05-03 15:49:58 -07:00
parent 04c5d4fcab
commit 924d0bdb08
5 changed files with 227 additions and 81 deletions

View File

@ -906,11 +906,37 @@ ErrorOr<ArrayRef<uint8_t>> BinaryFunction::getData() const {
return ArrayRef<uint8_t>(Bytes + Offset, getMaxSize());
}
size_t BinaryFunction::getSizeOfDataInCodeAt(uint64_t Offset) const {
if (Islands.DataOffsets.find(Offset) == Islands.DataOffsets.end())
return 0;
auto Iter = Islands.CodeOffsets.upper_bound(Offset);
if (Iter != Islands.CodeOffsets.end()) {
return *Iter - Offset;
}
return getSize() - Offset;
}
bool BinaryFunction::isZeroPaddingAt(uint64_t Offset) const {
ArrayRef<uint8_t> FunctionData = *getData();
uint64_t EndOfCode = getSize();
auto Iter = Islands.DataOffsets.upper_bound(Offset);
if (Iter != Islands.DataOffsets.end())
EndOfCode = *Iter;
for (auto I = Offset; I < EndOfCode; ++I) {
if (FunctionData[I] != 0) {
return false;
}
}
return true;
}
void BinaryFunction::disassemble() {
NamedRegionTimer T("disassemble", "Disassemble function", "buildfuncs",
"Build Binary Functions", opts::TimeBuild);
ErrorOr<ArrayRef<uint8_t>> ErrorOrFunctionData = getData();
assert(ErrorOrFunctionData && "Function data is not available");
assert(ErrorOrFunctionData && "function data is not available");
ArrayRef<uint8_t> FunctionData = *ErrorOrFunctionData;
assert(FunctionData.size() == getMaxSize() &&
"function size does not match raw data size");
@ -983,13 +1009,9 @@ void BinaryFunction::disassemble() {
const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
// Check for data inside code and ignore it
if (Islands.DataOffsets.find(Offset) != Islands.DataOffsets.end()) {
auto Iter = Islands.CodeOffsets.upper_bound(Offset);
if (Iter != Islands.CodeOffsets.end()) {
Size = *Iter - Offset;
continue;
}
break;
if (const auto DataInCodeSize = getSizeOfDataInCodeAt(Offset)) {
Size = DataInCodeSize;
continue;
}
if (!BC.DisAsm->getInstruction(Instruction,
@ -1000,32 +1022,21 @@ void BinaryFunction::disassemble() {
nulls())) {
// Functions with "soft" boundaries, e.g. coming from assembly source,
// can have 0-byte padding at the end.
bool IsZeroPadding = true;
uint64_t EndOfCode = getSize();
auto Iter = Islands.DataOffsets.upper_bound(Offset);
if (Iter != Islands.DataOffsets.end())
EndOfCode = *Iter;
for (auto I = Offset; I < EndOfCode; ++I) {
if (FunctionData[I] != 0) {
IsZeroPadding = false;
break;
}
if (isZeroPaddingAt(Offset))
break;
errs() << "BOLT-WARNING: unable to disassemble instruction at offset 0x"
<< Twine::utohexstr(Offset) << " (address 0x"
<< Twine::utohexstr(AbsoluteInstrAddr) << ") in function "
<< *this << '\n';
// Some AVX-512 instructions could not be disassembled at all.
if (BC.HasRelocations && opts::TrapOnAVX512 && BC.isX86()) {
setTrapOnEntry();
BC.TrappedFunctions.push_back(this);
} else {
IsSimple = false;
}
if (!IsZeroPadding) {
// Ignore this function. Skip to the next one in non-relocs mode.
errs() << "BOLT-WARNING: unable to disassemble instruction at offset 0x"
<< Twine::utohexstr(Offset) << " (address 0x"
<< Twine::utohexstr(AbsoluteInstrAddr) << ") in function "
<< *this << '\n';
// Some AVX-512 instructions could not be disassembled at all.
if (BC.HasRelocations && opts::TrapOnAVX512 && BC.isX86()) {
setTrapOnEntry();
BC.TrappedFunctions.push_back(this);
} else {
IsSimple = false;
}
}
break;
}
@ -1353,6 +1364,72 @@ add_instruction:
updateState(State::Disassembled);
}
void BinaryFunction::scanExternalRefs() {
if (isPLTFunction())
return;
ErrorOr<ArrayRef<uint8_t>> ErrorOrFunctionData = getData();
assert(ErrorOrFunctionData && "function data is not available");
ArrayRef<uint8_t> FunctionData = *ErrorOrFunctionData;
assert(FunctionData.size() == getMaxSize() &&
"function size does not match raw data size");
uint64_t Size = 0; // instruction size
for (uint64_t Offset = 0; Offset < getSize(); Offset += Size) {
// Check for data inside code and ignore it
if (const auto DataInCodeSize = getSizeOfDataInCodeAt(Offset)) {
Size = DataInCodeSize;
continue;
}
const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
MCInst Instruction;
if (!BC.DisAsm->getInstruction(Instruction,
Size,
FunctionData.slice(Offset),
AbsoluteInstrAddr,
nulls(),
nulls())) {
if (opts::Verbosity >= 1 && !isZeroPaddingAt(Offset)) {
errs() << "BOLT-WARNING: unable to disassemble instruction at offset 0x"
<< Twine::utohexstr(Offset) << " (address 0x"
<< Twine::utohexstr(AbsoluteInstrAddr) << ") in function "
<< *this << '\n';
}
break;
}
// Detect address reference by an instruction.
// Without relocations, we can only trust PC-relative address modes.
uint64_t TargetAddress{0};
if (BC.MIB->hasPCRelOperand(Instruction)) {
if (!BC.MIB->evaluateMemOperandTarget(Instruction, TargetAddress,
AbsoluteInstrAddr, Size)) {
continue;
}
} else if (BC.MIB->isCall(Instruction) || BC.MIB->isBranch(Instruction)) {
if (!BC.MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
TargetAddress)) {
continue;
}
} else {
continue;
}
if (containsAddress(TargetAddress, /*UseMaxSize=*/true))
continue;
auto *TargetFunction = BC.getBinaryFunctionContainingAddress(TargetAddress);
if (!TargetFunction)
continue;
const uint64_t TargetOffset = TargetAddress - TargetFunction->getAddress();
if (TargetOffset && TargetFunction->isSimple()) {
TargetFunction->addEntryPointAtOffset(TargetOffset);
}
}
}
void BinaryFunction::postProcessEntryPoints() {
if (!isSimple())
return;

View File

@ -2050,6 +2050,17 @@ public:
/// Returns false if disassembly failed.
void disassemble();
/// Scan function for references to other functions.
void scanExternalRefs();
/// Return the size of a data object located at \p Offset in the function.
/// Return 0 if there is no data object at the \p Offset.
size_t getSizeOfDataInCodeAt(uint64_t Offset) const;
/// Verify that starting at \p Offset function contents are filled with
/// zero-value bytes.
bool isZeroPaddingAt(uint64_t Offset) const;
/// Check that entry points have an associated instruction at their
/// offsets after disassembly.
void postProcessEntryPoints();

View File

@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "BinaryFunction.h"
#include "DataReader.h"
#include "llvm/Support/Debug.h"
#include <map>
@ -793,6 +794,34 @@ fetchMapEntriesRegex(
}
bool DataReader::mayHaveProfileData(const BinaryFunction &Function) {
if (Function.getBranchData() || Function.getMemData())
return true;
if (getFuncBranchData(Function.getNames()) || getFuncMemData(Function.getNames()))
return true;
const auto HasVolatileName = [&Function]() {
for (const auto Name : Function.getNames()) {
if (getLTOCommonName(Name))
return true;
}
return false;
}();
if (!HasVolatileName)
return false;
const auto AllBranchData = getFuncBranchDataRegex(Function.getNames());
if (!AllBranchData.empty())
return true;
const auto AllMemData = getFuncMemDataRegex(Function.getNames());
if (!AllMemData.empty())
return true;
return false;
}
FuncBranchData *
DataReader::getFuncBranchData(const std::vector<StringRef> &FuncNames) {
return fetchMapEntry<FuncsToBranchesMapTy>(FuncsToBranches, FuncNames);

View File

@ -32,6 +32,8 @@
namespace llvm {
namespace bolt {
class BinaryFunction;
struct LBREntry {
uint64_t From;
uint64_t To;
@ -358,6 +360,11 @@ public:
///
std::error_code parseInNoLBRMode();
/// Return true if the function \p BF may have a profile available.
/// The result is based on the name(s) of the function alone and the profile
/// match is not guaranteed.
bool mayHaveProfileData(const BinaryFunction &BF);
/// Return branch data matching one of the names in \p FuncNames.
FuncBranchData *
getFuncBranchData(const std::vector<StringRef> &FuncNames);

View File

@ -219,6 +219,13 @@ KeepTmp("keep-tmp",
cl::Hidden,
cl::cat(BoltCategory));
static cl::opt<bool>
Lite("lite",
cl::desc("skip processing of cold functions"),
cl::init(false),
cl::ZeroOrMore,
cl::cat(BoltCategory));
static cl::opt<unsigned>
MaxFunctions("max-funcs",
cl::desc("maximum number of functions to process"),
@ -456,7 +463,7 @@ RewriteInstance::~RewriteInstance() {}
bool RewriteInstance::shouldDisassemble(const BinaryFunction &BF) const {
// If we have to relocate the code we have to disassemble all functions.
if (!BF.getBinaryContext().HasRelocations && BF.isIgnored()) {
if (!BC->HasRelocations && BF.isIgnored()) {
return false;
}
@ -741,14 +748,11 @@ void RewriteInstance::run() {
discoverFileObjects();
std::thread PreProcessProfileThread([&]() {
if (!DA.started())
return;
outs() << "BOLT-INFO: spawning thread to pre-process profile\n";
preprocessProfileData();
});
if (opts::NoThreads)
if (opts::NoThreads || opts::Lite)
PreProcessProfileThread.join();
selectFunctionsToProcess();
@ -1683,6 +1687,14 @@ void RewriteInstance::adjustCommandLineOptions() {
if (!opts::AlignText.getNumOccurrences()) {
opts::AlignText = BC->PageAlign;
}
if (!BC->HasRelocations && opts::Lite.getNumOccurrences() == 0) {
opts::Lite = true;
} else if (BC->HasRelocations && opts::Lite) {
errs() << "BOLT-WARNING: lite mode currently does not work with "
"relocations\n";
opts::Lite = false;
}
}
namespace {
@ -2271,6 +2283,13 @@ void RewriteInstance::selectFunctionsToProcess() {
}
}
if (opts::Lite) {
if (!BC->DR.getAllFuncsData().empty() &&
!BC->DR.mayHaveProfileData(Function)) {
return false;
}
}
return true;
};
@ -2303,61 +2322,62 @@ void RewriteInstance::readDebugInfo() {
void RewriteInstance::preprocessProfileData() {
NamedRegionTimer T("preprocessprofile", "pre-process profile data",
TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
if (BAT->enabledFor(InputFile)) {
outs() << "BOLT-INFO: profile collection done on a binary already "
"processed by BOLT\n";
DA.setBAT(&*BAT);
if (DA.started()) {
if (BAT->enabledFor(InputFile)) {
outs() << "BOLT-INFO: profile collection done on a binary already "
"processed by BOLT\n";
DA.setBAT(&*BAT);
}
DA.parseProfile(*BC.get());
return;
}
// Preliminary match profile data to functions.
if (!BC->DR.getAllFuncsData().empty()) {
if (BC->DR.collectedInBoltedBinary()) {
outs() << "BOLT-INFO: profile collection done on a binary already "
"processed by BOLT\n";
}
for (auto &BFI : BC->getBinaryFunctions()) {
auto &Function = BFI.second;
if (auto *MemData = BC->DR.getFuncMemData(Function.getNames())) {
Function.MemData = MemData;
MemData->Used = true;
}
if (auto *FuncData = BC->DR.getFuncBranchData(Function.getNames())) {
Function.BranchData = FuncData;
Function.ExecutionCount = FuncData->ExecutionCount;
FuncData->Used = true;
}
}
}
DA.parseProfile(*BC.get());
}
void RewriteInstance::processProfileData() {
NamedRegionTimer T("processprofile", "process profile data", TimerGroupName,
TimerGroupDesc, opts::TimeRewrite);
auto &BinaryFunctions = BC->getBinaryFunctions();
if (!opts::BoltProfile.empty()) {
ProfileReader PR;
auto EC = PR.readProfile(opts::BoltProfile, BC->getBinaryFunctions());
check_error(EC, "cannot read profile");
return;
}
if (DA.started()) {
DA.processProfile(*BC.get());
for (auto &BFI : BinaryFunctions) {
for (auto &BFI : BC->getBinaryFunctions()) {
auto &Function = BFI.second;
Function.convertBranchData();
}
if (opts::AggregateOnly) {
if (std::error_code EC = DA.writeAggregatedFile()) {
check_error(EC, "cannot create output data file");
}
}
} else {
if (!opts::BoltProfile.empty()) {
ProfileReader PR;
auto EC = PR.readProfile(opts::BoltProfile, BinaryFunctions);
check_error(EC, "cannot read profile");
return;
}
// Preliminary match profile data to functions.
if (!BC->DR.getAllFuncsData().empty()) {
if (BC->DR.collectedInBoltedBinary()) {
outs() << "BOLT-INFO: profile collection done on a binary already "
"processed by BOLT\n";
}
for (auto &BFI : BinaryFunctions) {
auto &Function = BFI.second;
if (auto *MemData = BC->DR.getFuncMemData(Function.getNames())) {
Function.MemData = MemData;
MemData->Used = true;
}
if (auto *FuncData = BC->DR.getFuncBranchData(Function.getNames())) {
Function.BranchData = FuncData;
Function.ExecutionCount = FuncData->ExecutionCount;
FuncData->Used = true;
}
}
}
for (auto &BFI : BinaryFunctions) {
for (auto &BFI : BC->getBinaryFunctions()) {
auto &Function = BFI.second;
Function.readProfile();
}
@ -2375,17 +2395,11 @@ void RewriteInstance::disassembleFunctions() {
for (auto &BFI : BC->getBinaryFunctions()) {
BinaryFunction &Function = BFI.second;
if (!shouldDisassemble(Function)) {
Function.setSimple(false);
continue;
}
auto FunctionData = Function.getData();
if (!FunctionData) {
// When could it happen?
errs() << "BOLT-ERROR: corresponding section is non-executable or "
<< "empty for function " << Function << '\n';
continue;
exit(1);
}
// Treat zero-sized functions as non-simple ones.
@ -2399,6 +2413,14 @@ void RewriteInstance::disassembleFunctions() {
reinterpret_cast<const uint8_t*>(InputFile->getData().data());
Function.setFileOffset(FunctionData->begin() - FileBegin);
if (!shouldDisassemble(Function)) {
NamedRegionTimer T("scan", "scan functions", "buildfuncs",
"Scan Binary Functions", opts::TimeBuild);
Function.scanExternalRefs();
Function.setSimple(false);
continue;
}
Function.disassemble();
if (!Function.isSimple() && BC->HasRelocations) {