[PERF2BOLT] Improve file matching

Summary:
If the input binary for perf2bolt has a build-id and perf data has
recorded build-ids, then try to match them. Adjust the file name if
build-ids match to cover cases where the binary was renamed after data
collection. If there's no matching build-id report an error and exit.

While scanning task events, truncate the name to 15 characters prior to
matching, since that's how names are reported by perf.

(cherry picked from FBD8034436)
This commit is contained in:
Maksim Panchenko 2018-05-16 13:31:13 -07:00
parent 13968f7fa9
commit 6302e18f94
4 changed files with 78 additions and 75 deletions

View File

@ -35,16 +35,22 @@ namespace opts {
extern cl::OptionCategory AggregatorCategory; extern cl::OptionCategory AggregatorCategory;
static llvm::cl::opt<bool> static cl::opt<bool>
TimeAggregator("time-aggr", BasicAggregation("nl",
cl::desc("time BOLT aggregator"), cl::desc("aggregate basic samples (without LBR info)"),
cl::init(false), cl::init(false),
cl::ZeroOrMore, cl::ZeroOrMore,
cl::cat(AggregatorCategory)); cl::cat(AggregatorCategory));
static llvm::cl::opt<bool> static cl::opt<bool>
BasicAggregation("nl", IgnoreBuildID("ignore-build-id",
cl::desc("aggregate basic samples (without LBR info)"), cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
cl::init(false),
cl::cat(AggregatorCategory));
static cl::opt<bool>
TimeAggregator("time-aggr",
cl::desc("time BOLT aggregator"),
cl::init(false), cl::init(false),
cl::ZeroOrMore, cl::ZeroOrMore,
cl::cat(AggregatorCategory)); cl::cat(AggregatorCategory));
@ -219,7 +225,7 @@ bool DataAggregator::launchPerfTasksNoWait() {
return true; return true;
} }
Optional<std::string> DataAggregator::getPerfBuildID() { void DataAggregator::processFileBuildID(StringRef FileBuildID) {
SmallVector<const char *, 4> Argv; SmallVector<const char *, 4> Argv;
SmallVector<char, 256> OutputPath; SmallVector<char, 256> OutputPath;
SmallVector<char, 256> ErrPath; SmallVector<char, 256> ErrPath;
@ -265,7 +271,7 @@ Optional<std::string> DataAggregator::getPerfBuildID() {
errs() << ErrBuf; errs() << ErrBuf;
deleteTempFile(ErrPath.data()); deleteTempFile(ErrPath.data());
deleteTempFile(OutputPath.data()); deleteTempFile(OutputPath.data());
return NoneType(); return;
} }
ErrorOr<std::unique_ptr<MemoryBuffer>> MB = ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
@ -275,26 +281,44 @@ Optional<std::string> DataAggregator::getPerfBuildID() {
<< EC.message() << "\n"; << EC.message() << "\n";
deleteTempFile(ErrPath.data()); deleteTempFile(ErrPath.data());
deleteTempFile(OutputPath.data()); deleteTempFile(OutputPath.data());
return NoneType(); return;
} }
FileBuf.reset(MB->release()); FileBuf.reset(MB->release());
ParsingBuf = FileBuf->getBuffer(); ParsingBuf = FileBuf->getBuffer();
Col = 0; if (ParsingBuf.empty()) {
Line = 1; errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf "
auto ParseResult = parsePerfBuildID(); "data was recorded without it\n";
if (!ParseResult) {
outs() << "PERF2BOLT: Failed to parse build-id from perf output\n";
deleteTempFile(ErrPath.data()); deleteTempFile(ErrPath.data());
deleteTempFile(OutputPath.data()); deleteTempFile(OutputPath.data());
return NoneType(); return;
} }
outs() << "PERF2BOLT: Perf.data build-id is: " << *ParseResult << "\n"; Col = 0;
Line = 1;
auto FileName = getFileNameForBuildID(FileBuildID);
if (!FileName) {
errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. "
"This indicates the input binary supplied for data aggregation "
"is not the same recorded by perf when collecting profiling "
"data. Use -ignore-build-id option to override.\n";
if (!opts::IgnoreBuildID) {
deleteTempFile(ErrPath.data());
deleteTempFile(OutputPath.data());
abort();
exit(1);
}
} else if (*FileName != BinaryName) {
errs() << "PERF2BOLT-WARNING: build-id matched a different file name. "
"Using \"" << *FileName << "\" for profile parsing.\n";
BinaryName = *FileName;
} else {
outs() << "PERF2BOLT: matched build-id and file name\n";
}
deleteTempFile(ErrPath.data()); deleteTempFile(ErrPath.data());
deleteTempFile(OutputPath.data()); deleteTempFile(OutputPath.data());
return std::string(ParseResult->data(), ParseResult->size()); return;
} }
bool DataAggregator::checkPerfDataMagic(StringRef FileName) { bool DataAggregator::checkPerfDataMagic(StringRef FileName) {
@ -967,7 +991,7 @@ ErrorOr<int64_t> DataAggregator::parseTaskPID() {
auto CommNameStr = parseString(FieldSeparator, true); auto CommNameStr = parseString(FieldSeparator, true);
if (std::error_code EC = CommNameStr.getError()) if (std::error_code EC = CommNameStr.getError())
return EC; return EC;
if (CommNameStr.get() != BinaryName) { if (CommNameStr.get() != BinaryName.substr(0, 15)) {
consumeRestOfLine(); consumeRestOfLine();
return -1; return -1;
} }
@ -1013,12 +1037,19 @@ std::error_code DataAggregator::parseTasks() {
PIDs.insert(PID); PIDs.insert(PID);
} }
if (!PIDs.empty()) if (!PIDs.empty()) {
outs() << "PERF2BOLT: Input binary is associated with " << PIDs.size() outs() << "PERF2BOLT: Input binary is associated with " << PIDs.size()
<< " PID(s)\n"; << " PID(s)\n";
else } else {
outs() << "PERF2BOLT: Could not bind input binary to a PID - will parse " if (errs().has_colors())
"all samples in perf data.\n"; errs().changeColor(raw_ostream::YELLOW);
errs() << "PERF2BOLT-WARNING: Could not bind input binary to a PID - will "
"parse all samples in perf data. This could result in corrupted "
"samples for the input binary if system-wide profile collection "
"was used.\n";
if (errs().has_colors())
errs().resetColor();
}
return std::error_code(); return std::error_code();
} }
@ -1039,16 +1070,15 @@ DataAggregator::parseNameBuildIDPair() {
return std::make_pair(NameStr.get(), BuildIDStr.get()); return std::make_pair(NameStr.get(), BuildIDStr.get());
} }
Optional<StringRef> DataAggregator::parsePerfBuildID() { Optional<StringRef>
DataAggregator::getFileNameForBuildID(StringRef FileBuildID) {
while (hasData()) { while (hasData()) {
auto IDPair = parseNameBuildIDPair(); auto IDPair = parseNameBuildIDPair();
if (!IDPair) if (!IDPair)
return NoneType(); return NoneType();
if (sys::path::filename(IDPair->first) != BinaryName) if (IDPair->second == FileBuildID)
continue; return sys::path::filename(IDPair->first);
return IDPair->second;
} }
return NoneType(); return NoneType();
} }

View File

@ -85,7 +85,7 @@ class DataAggregator : public DataReader {
StringRef OutputFDataName; StringRef OutputFDataName;
/// Our sampled binary name to look for in perf.data /// Our sampled binary name to look for in perf.data
StringRef BinaryName; std::string BinaryName;
DenseSet<int64_t> PIDs; DenseSet<int64_t> PIDs;
@ -189,9 +189,9 @@ class DataAggregator : public DataReader {
/// Parse a single pair of binary full path and associated build-id /// Parse a single pair of binary full path and associated build-id
Optional<std::pair<StringRef, StringRef>> parseNameBuildIDPair(); Optional<std::pair<StringRef, StringRef>> parseNameBuildIDPair();
/// Parse the output generated by perf buildid-list to extract the build-id /// Parse the output generated by "perf buildid-list" to extract build-ids
/// of the binary used when collecting profiling /// and return a file name matching a given \p FileBuildID.
Optional<StringRef> parsePerfBuildID(); Optional<StringRef> getFileNameForBuildID(StringRef FileBuildID);
public: public:
DataAggregator(raw_ostream &Diag, StringRef BinaryName) DataAggregator(raw_ostream &Diag, StringRef BinaryName)
@ -221,11 +221,13 @@ public:
/// Check whether \p FileName is a perf.data file /// Check whether \p FileName is a perf.data file
static bool checkPerfDataMagic(StringRef FileName); static bool checkPerfDataMagic(StringRef FileName);
/// Launch a subprocess with perf buildid-list to extract the build-id of the /// If we have a build-id available for the input file, use it to assist
/// binary used when collecting profiling. Different than launchPerf*, this /// matching profile to a binary.
/// one spawns the subprocess and blocks. Then it parses the result and ///
/// returns the build-id. /// If the binary name changed after profile collection, use build-id
Optional<std::string> getPerfBuildID(); /// to get the proper name in perf data when build-ids are available.
/// If \p FileBuildID has no match, then issue an error and exit.
void processFileBuildID(StringRef FileBuildID);
/// Debugging dump methods /// Debugging dump methods
void dump() const; void dump() const;

View File

@ -370,12 +370,6 @@ DiffOnly("diff-only",
cl::Hidden, cl::Hidden,
cl::cat(BoltDiffCategory)); cl::cat(BoltDiffCategory));
static cl::opt<bool>
IgnoreBuildID("ignore-build-id",
cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
cl::init(false),
cl::cat(AggregatorCategory));
static cl::opt<bool> static cl::opt<bool>
TimeRewrite("time-rewrite", TimeRewrite("time-rewrite",
cl::desc("print time spent in rewriting passes"), cl::desc("print time spent in rewriting passes"),
@ -950,38 +944,12 @@ RewriteInstance::getBuildID() const {
OS << Twine::utohexstr(*CharIter); OS << Twine::utohexstr(*CharIter);
++CharIter; ++CharIter;
} }
outs() << "BOLT-INFO: Binary build-id is: " << OS.str() << "\n"; outs() << "BOLT-INFO: binary build-id is: " << OS.str() << "\n";
return OS.str(); return OS.str();
} }
return NoneType(); return NoneType();
} }
void RewriteInstance::checkBuildID() {
auto FileBuildID = getBuildID();
if (!FileBuildID) {
outs() << "BOLT-WARNING: Build ID will not be checked because we could not "
"read one from input binary\n";
return;
}
auto PerfBuildID = DA.getPerfBuildID();
if (!PerfBuildID) {
outs() << "BOLT-WARNING: Build ID will not be checked because we could not "
"read one from perf.data\n";
return;
}
if (*FileBuildID == *PerfBuildID)
return;
outs() << "BOLT-ERROR: Build ID mismatch! This indicates the input binary "
"supplied for data aggregation is not the same recorded by perf "
"when collecting profiling data.\n";
if (!opts::IgnoreBuildID) {
DA.abort();
exit(1);
}
}
void RewriteInstance::run() { void RewriteInstance::run() {
if (!BC) { if (!BC) {
errs() << "BOLT-ERROR: failed to create a binary context\n"; errs() << "BOLT-ERROR: failed to create a binary context\n";
@ -1015,8 +983,15 @@ void RewriteInstance::run() {
(llvm::Triple::ArchType)InputFile->getArch()) (llvm::Triple::ArchType)InputFile->getArch())
<< "\n"; << "\n";
if (DA.started()) if (DA.started()) {
checkBuildID(); if (auto FileBuildID = getBuildID()) {
DA.processFileBuildID(*FileBuildID);
} else {
errs() << "BOLT-WARNING: build-id will not be checked because we could "
"not read one from input binary\n";
}
}
unsigned PassNumber = 1; unsigned PassNumber = 1;
executeRewritePass({}); executeRewritePass({});
if (opts::AggregateOnly || opts::DiffOnly) if (opts::AggregateOnly || opts::DiffOnly)

View File

@ -121,10 +121,6 @@ public:
/// Run all the necessary steps to read, optimize and rewrite the binary. /// Run all the necessary steps to read, optimize and rewrite the binary.
void run(); void run();
/// Check that binary build ID matches the one used in perf.data to collect
/// profile
void checkBuildID();
/// Diff this instance against another one. Non-const since we may run passes /// Diff this instance against another one. Non-const since we may run passes
/// to fold identical functions. /// to fold identical functions.
void compare(RewriteInstance &RI2); void compare(RewriteInstance &RI2);