forked from OSchip/llvm-project
[BOLT] Introduce lite processing mode without relocations
Summary: When optimizing a binary without relocations, we can skip processing functions without profile (cold functions). By skipping processing of cold functions, we reduce the processing time and memory. We call such mode a lite mode, and it is enabled by default. Some processing is still done for functions without profile even in lite mode. scanExternalRefs() function is used to detect secondary entry points to functions that are not marked in the symbol table. Note that the no-relocation requirement is a temporary limitation of the lite mode. (cherry picked from FBD21366567)
This commit is contained in:
parent
04c5d4fcab
commit
924d0bdb08
|
@ -906,11 +906,37 @@ ErrorOr<ArrayRef<uint8_t>> BinaryFunction::getData() const {
|
|||
return ArrayRef<uint8_t>(Bytes + Offset, getMaxSize());
|
||||
}
|
||||
|
||||
size_t BinaryFunction::getSizeOfDataInCodeAt(uint64_t Offset) const {
|
||||
if (Islands.DataOffsets.find(Offset) == Islands.DataOffsets.end())
|
||||
return 0;
|
||||
|
||||
auto Iter = Islands.CodeOffsets.upper_bound(Offset);
|
||||
if (Iter != Islands.CodeOffsets.end()) {
|
||||
return *Iter - Offset;
|
||||
}
|
||||
return getSize() - Offset;
|
||||
}
|
||||
|
||||
bool BinaryFunction::isZeroPaddingAt(uint64_t Offset) const {
|
||||
ArrayRef<uint8_t> FunctionData = *getData();
|
||||
uint64_t EndOfCode = getSize();
|
||||
auto Iter = Islands.DataOffsets.upper_bound(Offset);
|
||||
if (Iter != Islands.DataOffsets.end())
|
||||
EndOfCode = *Iter;
|
||||
for (auto I = Offset; I < EndOfCode; ++I) {
|
||||
if (FunctionData[I] != 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void BinaryFunction::disassemble() {
|
||||
NamedRegionTimer T("disassemble", "Disassemble function", "buildfuncs",
|
||||
"Build Binary Functions", opts::TimeBuild);
|
||||
ErrorOr<ArrayRef<uint8_t>> ErrorOrFunctionData = getData();
|
||||
assert(ErrorOrFunctionData && "Function data is not available");
|
||||
assert(ErrorOrFunctionData && "function data is not available");
|
||||
ArrayRef<uint8_t> FunctionData = *ErrorOrFunctionData;
|
||||
assert(FunctionData.size() == getMaxSize() &&
|
||||
"function size does not match raw data size");
|
||||
|
@ -983,13 +1009,9 @@ void BinaryFunction::disassemble() {
|
|||
const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
|
||||
|
||||
// Check for data inside code and ignore it
|
||||
if (Islands.DataOffsets.find(Offset) != Islands.DataOffsets.end()) {
|
||||
auto Iter = Islands.CodeOffsets.upper_bound(Offset);
|
||||
if (Iter != Islands.CodeOffsets.end()) {
|
||||
Size = *Iter - Offset;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
if (const auto DataInCodeSize = getSizeOfDataInCodeAt(Offset)) {
|
||||
Size = DataInCodeSize;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!BC.DisAsm->getInstruction(Instruction,
|
||||
|
@ -1000,32 +1022,21 @@ void BinaryFunction::disassemble() {
|
|||
nulls())) {
|
||||
// Functions with "soft" boundaries, e.g. coming from assembly source,
|
||||
// can have 0-byte padding at the end.
|
||||
bool IsZeroPadding = true;
|
||||
uint64_t EndOfCode = getSize();
|
||||
auto Iter = Islands.DataOffsets.upper_bound(Offset);
|
||||
if (Iter != Islands.DataOffsets.end())
|
||||
EndOfCode = *Iter;
|
||||
for (auto I = Offset; I < EndOfCode; ++I) {
|
||||
if (FunctionData[I] != 0) {
|
||||
IsZeroPadding = false;
|
||||
break;
|
||||
}
|
||||
if (isZeroPaddingAt(Offset))
|
||||
break;
|
||||
|
||||
errs() << "BOLT-WARNING: unable to disassemble instruction at offset 0x"
|
||||
<< Twine::utohexstr(Offset) << " (address 0x"
|
||||
<< Twine::utohexstr(AbsoluteInstrAddr) << ") in function "
|
||||
<< *this << '\n';
|
||||
// Some AVX-512 instructions could not be disassembled at all.
|
||||
if (BC.HasRelocations && opts::TrapOnAVX512 && BC.isX86()) {
|
||||
setTrapOnEntry();
|
||||
BC.TrappedFunctions.push_back(this);
|
||||
} else {
|
||||
IsSimple = false;
|
||||
}
|
||||
|
||||
if (!IsZeroPadding) {
|
||||
// Ignore this function. Skip to the next one in non-relocs mode.
|
||||
errs() << "BOLT-WARNING: unable to disassemble instruction at offset 0x"
|
||||
<< Twine::utohexstr(Offset) << " (address 0x"
|
||||
<< Twine::utohexstr(AbsoluteInstrAddr) << ") in function "
|
||||
<< *this << '\n';
|
||||
// Some AVX-512 instructions could not be disassembled at all.
|
||||
if (BC.HasRelocations && opts::TrapOnAVX512 && BC.isX86()) {
|
||||
setTrapOnEntry();
|
||||
BC.TrappedFunctions.push_back(this);
|
||||
} else {
|
||||
IsSimple = false;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1353,6 +1364,72 @@ add_instruction:
|
|||
updateState(State::Disassembled);
|
||||
}
|
||||
|
||||
void BinaryFunction::scanExternalRefs() {
|
||||
if (isPLTFunction())
|
||||
return;
|
||||
|
||||
ErrorOr<ArrayRef<uint8_t>> ErrorOrFunctionData = getData();
|
||||
assert(ErrorOrFunctionData && "function data is not available");
|
||||
ArrayRef<uint8_t> FunctionData = *ErrorOrFunctionData;
|
||||
assert(FunctionData.size() == getMaxSize() &&
|
||||
"function size does not match raw data size");
|
||||
|
||||
uint64_t Size = 0; // instruction size
|
||||
for (uint64_t Offset = 0; Offset < getSize(); Offset += Size) {
|
||||
// Check for data inside code and ignore it
|
||||
if (const auto DataInCodeSize = getSizeOfDataInCodeAt(Offset)) {
|
||||
Size = DataInCodeSize;
|
||||
continue;
|
||||
}
|
||||
|
||||
const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
|
||||
MCInst Instruction;
|
||||
if (!BC.DisAsm->getInstruction(Instruction,
|
||||
Size,
|
||||
FunctionData.slice(Offset),
|
||||
AbsoluteInstrAddr,
|
||||
nulls(),
|
||||
nulls())) {
|
||||
if (opts::Verbosity >= 1 && !isZeroPaddingAt(Offset)) {
|
||||
errs() << "BOLT-WARNING: unable to disassemble instruction at offset 0x"
|
||||
<< Twine::utohexstr(Offset) << " (address 0x"
|
||||
<< Twine::utohexstr(AbsoluteInstrAddr) << ") in function "
|
||||
<< *this << '\n';
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Detect address reference by an instruction.
|
||||
// Without relocations, we can only trust PC-relative address modes.
|
||||
uint64_t TargetAddress{0};
|
||||
if (BC.MIB->hasPCRelOperand(Instruction)) {
|
||||
if (!BC.MIB->evaluateMemOperandTarget(Instruction, TargetAddress,
|
||||
AbsoluteInstrAddr, Size)) {
|
||||
continue;
|
||||
}
|
||||
} else if (BC.MIB->isCall(Instruction) || BC.MIB->isBranch(Instruction)) {
|
||||
if (!BC.MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
|
||||
TargetAddress)) {
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (containsAddress(TargetAddress, /*UseMaxSize=*/true))
|
||||
continue;
|
||||
|
||||
auto *TargetFunction = BC.getBinaryFunctionContainingAddress(TargetAddress);
|
||||
if (!TargetFunction)
|
||||
continue;
|
||||
|
||||
const uint64_t TargetOffset = TargetAddress - TargetFunction->getAddress();
|
||||
if (TargetOffset && TargetFunction->isSimple()) {
|
||||
TargetFunction->addEntryPointAtOffset(TargetOffset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BinaryFunction::postProcessEntryPoints() {
|
||||
if (!isSimple())
|
||||
return;
|
||||
|
|
|
@ -2050,6 +2050,17 @@ public:
|
|||
/// Returns false if disassembly failed.
|
||||
void disassemble();
|
||||
|
||||
/// Scan function for references to other functions.
|
||||
void scanExternalRefs();
|
||||
|
||||
/// Return the size of a data object located at \p Offset in the function.
|
||||
/// Return 0 if there is no data object at the \p Offset.
|
||||
size_t getSizeOfDataInCodeAt(uint64_t Offset) const;
|
||||
|
||||
/// Verify that starting at \p Offset function contents are filled with
|
||||
/// zero-value bytes.
|
||||
bool isZeroPaddingAt(uint64_t Offset) const;
|
||||
|
||||
/// Check that entry points have an associated instruction at their
|
||||
/// offsets after disassembly.
|
||||
void postProcessEntryPoints();
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
#include "BinaryFunction.h"
|
||||
#include "DataReader.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include <map>
|
||||
|
@ -793,6 +794,34 @@ fetchMapEntriesRegex(
|
|||
|
||||
}
|
||||
|
||||
bool DataReader::mayHaveProfileData(const BinaryFunction &Function) {
|
||||
if (Function.getBranchData() || Function.getMemData())
|
||||
return true;
|
||||
|
||||
if (getFuncBranchData(Function.getNames()) || getFuncMemData(Function.getNames()))
|
||||
return true;
|
||||
|
||||
const auto HasVolatileName = [&Function]() {
|
||||
for (const auto Name : Function.getNames()) {
|
||||
if (getLTOCommonName(Name))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}();
|
||||
if (!HasVolatileName)
|
||||
return false;
|
||||
|
||||
const auto AllBranchData = getFuncBranchDataRegex(Function.getNames());
|
||||
if (!AllBranchData.empty())
|
||||
return true;
|
||||
|
||||
const auto AllMemData = getFuncMemDataRegex(Function.getNames());
|
||||
if (!AllMemData.empty())
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
FuncBranchData *
|
||||
DataReader::getFuncBranchData(const std::vector<StringRef> &FuncNames) {
|
||||
return fetchMapEntry<FuncsToBranchesMapTy>(FuncsToBranches, FuncNames);
|
||||
|
|
|
@ -32,6 +32,8 @@
|
|||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
class BinaryFunction;
|
||||
|
||||
struct LBREntry {
|
||||
uint64_t From;
|
||||
uint64_t To;
|
||||
|
@ -358,6 +360,11 @@ public:
|
|||
///
|
||||
std::error_code parseInNoLBRMode();
|
||||
|
||||
/// Return true if the function \p BF may have a profile available.
|
||||
/// The result is based on the name(s) of the function alone and the profile
|
||||
/// match is not guaranteed.
|
||||
bool mayHaveProfileData(const BinaryFunction &BF);
|
||||
|
||||
/// Return branch data matching one of the names in \p FuncNames.
|
||||
FuncBranchData *
|
||||
getFuncBranchData(const std::vector<StringRef> &FuncNames);
|
||||
|
|
|
@ -219,6 +219,13 @@ KeepTmp("keep-tmp",
|
|||
cl::Hidden,
|
||||
cl::cat(BoltCategory));
|
||||
|
||||
static cl::opt<bool>
|
||||
Lite("lite",
|
||||
cl::desc("skip processing of cold functions"),
|
||||
cl::init(false),
|
||||
cl::ZeroOrMore,
|
||||
cl::cat(BoltCategory));
|
||||
|
||||
static cl::opt<unsigned>
|
||||
MaxFunctions("max-funcs",
|
||||
cl::desc("maximum number of functions to process"),
|
||||
|
@ -456,7 +463,7 @@ RewriteInstance::~RewriteInstance() {}
|
|||
|
||||
bool RewriteInstance::shouldDisassemble(const BinaryFunction &BF) const {
|
||||
// If we have to relocate the code we have to disassemble all functions.
|
||||
if (!BF.getBinaryContext().HasRelocations && BF.isIgnored()) {
|
||||
if (!BC->HasRelocations && BF.isIgnored()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -741,14 +748,11 @@ void RewriteInstance::run() {
|
|||
discoverFileObjects();
|
||||
|
||||
std::thread PreProcessProfileThread([&]() {
|
||||
if (!DA.started())
|
||||
return;
|
||||
|
||||
outs() << "BOLT-INFO: spawning thread to pre-process profile\n";
|
||||
preprocessProfileData();
|
||||
});
|
||||
|
||||
if (opts::NoThreads)
|
||||
if (opts::NoThreads || opts::Lite)
|
||||
PreProcessProfileThread.join();
|
||||
|
||||
selectFunctionsToProcess();
|
||||
|
@ -1683,6 +1687,14 @@ void RewriteInstance::adjustCommandLineOptions() {
|
|||
if (!opts::AlignText.getNumOccurrences()) {
|
||||
opts::AlignText = BC->PageAlign;
|
||||
}
|
||||
|
||||
if (!BC->HasRelocations && opts::Lite.getNumOccurrences() == 0) {
|
||||
opts::Lite = true;
|
||||
} else if (BC->HasRelocations && opts::Lite) {
|
||||
errs() << "BOLT-WARNING: lite mode currently does not work with "
|
||||
"relocations\n";
|
||||
opts::Lite = false;
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
@ -2271,6 +2283,13 @@ void RewriteInstance::selectFunctionsToProcess() {
|
|||
}
|
||||
}
|
||||
|
||||
if (opts::Lite) {
|
||||
if (!BC->DR.getAllFuncsData().empty() &&
|
||||
!BC->DR.mayHaveProfileData(Function)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@ -2303,61 +2322,62 @@ void RewriteInstance::readDebugInfo() {
|
|||
void RewriteInstance::preprocessProfileData() {
|
||||
NamedRegionTimer T("preprocessprofile", "pre-process profile data",
|
||||
TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
|
||||
if (BAT->enabledFor(InputFile)) {
|
||||
outs() << "BOLT-INFO: profile collection done on a binary already "
|
||||
"processed by BOLT\n";
|
||||
DA.setBAT(&*BAT);
|
||||
if (DA.started()) {
|
||||
if (BAT->enabledFor(InputFile)) {
|
||||
outs() << "BOLT-INFO: profile collection done on a binary already "
|
||||
"processed by BOLT\n";
|
||||
DA.setBAT(&*BAT);
|
||||
}
|
||||
DA.parseProfile(*BC.get());
|
||||
return;
|
||||
}
|
||||
|
||||
// Preliminary match profile data to functions.
|
||||
if (!BC->DR.getAllFuncsData().empty()) {
|
||||
if (BC->DR.collectedInBoltedBinary()) {
|
||||
outs() << "BOLT-INFO: profile collection done on a binary already "
|
||||
"processed by BOLT\n";
|
||||
}
|
||||
for (auto &BFI : BC->getBinaryFunctions()) {
|
||||
auto &Function = BFI.second;
|
||||
if (auto *MemData = BC->DR.getFuncMemData(Function.getNames())) {
|
||||
Function.MemData = MemData;
|
||||
MemData->Used = true;
|
||||
}
|
||||
if (auto *FuncData = BC->DR.getFuncBranchData(Function.getNames())) {
|
||||
Function.BranchData = FuncData;
|
||||
Function.ExecutionCount = FuncData->ExecutionCount;
|
||||
FuncData->Used = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
DA.parseProfile(*BC.get());
|
||||
}
|
||||
|
||||
void RewriteInstance::processProfileData() {
|
||||
NamedRegionTimer T("processprofile", "process profile data", TimerGroupName,
|
||||
TimerGroupDesc, opts::TimeRewrite);
|
||||
auto &BinaryFunctions = BC->getBinaryFunctions();
|
||||
if (!opts::BoltProfile.empty()) {
|
||||
ProfileReader PR;
|
||||
auto EC = PR.readProfile(opts::BoltProfile, BC->getBinaryFunctions());
|
||||
check_error(EC, "cannot read profile");
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (DA.started()) {
|
||||
DA.processProfile(*BC.get());
|
||||
|
||||
for (auto &BFI : BinaryFunctions) {
|
||||
for (auto &BFI : BC->getBinaryFunctions()) {
|
||||
auto &Function = BFI.second;
|
||||
Function.convertBranchData();
|
||||
}
|
||||
|
||||
if (opts::AggregateOnly) {
|
||||
if (std::error_code EC = DA.writeAggregatedFile()) {
|
||||
check_error(EC, "cannot create output data file");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (!opts::BoltProfile.empty()) {
|
||||
ProfileReader PR;
|
||||
auto EC = PR.readProfile(opts::BoltProfile, BinaryFunctions);
|
||||
check_error(EC, "cannot read profile");
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Preliminary match profile data to functions.
|
||||
if (!BC->DR.getAllFuncsData().empty()) {
|
||||
if (BC->DR.collectedInBoltedBinary()) {
|
||||
outs() << "BOLT-INFO: profile collection done on a binary already "
|
||||
"processed by BOLT\n";
|
||||
}
|
||||
for (auto &BFI : BinaryFunctions) {
|
||||
auto &Function = BFI.second;
|
||||
if (auto *MemData = BC->DR.getFuncMemData(Function.getNames())) {
|
||||
Function.MemData = MemData;
|
||||
MemData->Used = true;
|
||||
}
|
||||
if (auto *FuncData = BC->DR.getFuncBranchData(Function.getNames())) {
|
||||
Function.BranchData = FuncData;
|
||||
Function.ExecutionCount = FuncData->ExecutionCount;
|
||||
FuncData->Used = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &BFI : BinaryFunctions) {
|
||||
for (auto &BFI : BC->getBinaryFunctions()) {
|
||||
auto &Function = BFI.second;
|
||||
Function.readProfile();
|
||||
}
|
||||
|
@ -2375,17 +2395,11 @@ void RewriteInstance::disassembleFunctions() {
|
|||
for (auto &BFI : BC->getBinaryFunctions()) {
|
||||
BinaryFunction &Function = BFI.second;
|
||||
|
||||
if (!shouldDisassemble(Function)) {
|
||||
Function.setSimple(false);
|
||||
continue;
|
||||
}
|
||||
|
||||
auto FunctionData = Function.getData();
|
||||
if (!FunctionData) {
|
||||
// When could it happen?
|
||||
errs() << "BOLT-ERROR: corresponding section is non-executable or "
|
||||
<< "empty for function " << Function << '\n';
|
||||
continue;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Treat zero-sized functions as non-simple ones.
|
||||
|
@ -2399,6 +2413,14 @@ void RewriteInstance::disassembleFunctions() {
|
|||
reinterpret_cast<const uint8_t*>(InputFile->getData().data());
|
||||
Function.setFileOffset(FunctionData->begin() - FileBegin);
|
||||
|
||||
if (!shouldDisassemble(Function)) {
|
||||
NamedRegionTimer T("scan", "scan functions", "buildfuncs",
|
||||
"Scan Binary Functions", opts::TimeBuild);
|
||||
Function.scanExternalRefs();
|
||||
Function.setSimple(false);
|
||||
continue;
|
||||
}
|
||||
|
||||
Function.disassemble();
|
||||
|
||||
if (!Function.isSimple() && BC->HasRelocations) {
|
||||
|
|
Loading…
Reference in New Issue