[perf2bolt] Enforce strict mode for perf2bolt

Summary:
In strict relocation mode, we get better function coverage. However, if
the profile used for optimization was converted using non-strict mode,
then it wouldn't match functions exclusive to strict mode. Hence,
we have to enforce strict relocation mode for profile conversion, so it
can be used for either mode.

I'm also adding parallel profile pre-processing unless `--no-threads` is
specified. This masks the runtime overhead of function disassembly on
multi-core machines.

(cherry picked from FBD16587855)
This commit is contained in:
Maksim Panchenko 2019-06-11 13:24:10 -07:00
parent 1bce256e67
commit 79ff4ec1cb
1 changed files with 36 additions and 10 deletions

View File

@ -70,6 +70,7 @@
#include <fstream>
#include <stack>
#include <system_error>
#include <thread>
#undef DEBUG_TYPE
#define DEBUG_TYPE "bolt"
@ -784,8 +785,13 @@ bool RewriteInstance::shouldDisassemble(const BinaryFunction &BF) const {
return false;
}
// In strict mode we have to account for all functions.
if (!opts::StrictMode && opts::AggregateOnly && !BF.hasProfileAvailable())
// If we are running in profile conversion mode and there is no profile
// available for the function, we can skip the disassembly.
// However, in strict relocation mode we need to account for all
// functions. Also, when multi-threading is enabled, the profile may not be
// available yet, and we conservatively disassemble the function.
if (opts::AggregateOnly && opts::NoThreads && !opts::StrictMode &&
!BF.hasProfileAvailable())
return false;
return true;
@ -1053,18 +1059,31 @@ void RewriteInstance::run() {
readSpecialSections();
adjustCommandLineOptions();
discoverFileObjects();
std::thread PreProcessProfileThread([&]() {
outs() << "BOLT-INFO: spawning thread to pre-process profile\n";
preprocessProfileData();
if (opts::AggregateOnly && DA.usesBAT()) {
// Skip disassembling if we have a translation table and we running an
// aggregation job.
processProfileData();
return;
}
});
if (opts::NoThreads)
PreProcessProfileThread.join();
readDebugInfo();
// Skip disassembling if we have a translation table and we are running an
// aggregation job.
if (!opts::AggregateOnly || !DA.usesBAT()) {
disassembleFunctions();
}
if (PreProcessProfileThread.joinable())
PreProcessProfileThread.join();
processProfileData();
if (opts::AggregateOnly)
return;
postProcessFunctions();
for (uint64_t Address : NonSimpleFunctions) {
auto *BF = BC->getBinaryFunctionAtAddress(Address);
@ -1980,6 +1999,13 @@ void RewriteInstance::adjustCommandLineOptions() {
opts::StrictMode = false;
}
if (BC->HasRelocations && opts::AggregateOnly &&
!opts::StrictMode.getNumOccurrences()) {
outs() << "BOLT-INFO: enabling strict relocation mode for aggregtion "
"purposes\n";
opts::StrictMode = true;
}
if (BC->isX86() && BC->HasRelocations &&
opts::AlignMacroOpFusion == MFT_HOT &&
!DA.started() && BC->DR.getAllFuncsData().empty() &&