2021-12-22 02:21:41 +08:00
|
|
|
//===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===//
|
2017-09-02 09:13:51 +08:00
|
|
|
//
|
2021-03-16 09:04:18 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2017-09-02 09:13:51 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This family of functions reads profile data written by perf record,
|
|
|
|
// aggregate it and then write it back to an output file.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2021-10-09 02:47:10 +08:00
|
|
|
#include "bolt/Profile/DataAggregator.h"
|
|
|
|
#include "bolt/Core/BinaryContext.h"
|
|
|
|
#include "bolt/Core/BinaryFunction.h"
|
|
|
|
#include "bolt/Profile/BoltAddressTranslation.h"
|
|
|
|
#include "bolt/Profile/Heatmap.h"
|
|
|
|
#include "bolt/Utils/CommandLineOpts.h"
|
|
|
|
#include "bolt/Utils/Utils.h"
|
2021-11-12 10:14:53 +08:00
|
|
|
#include "llvm/ADT/ScopeExit.h"
|
2020-12-02 08:29:39 +08:00
|
|
|
#include "llvm/Support/CommandLine.h"
|
2017-09-02 09:13:51 +08:00
|
|
|
#include "llvm/Support/Debug.h"
|
2022-02-14 23:27:04 +08:00
|
|
|
#include "llvm/Support/Errc.h"
|
2017-09-02 09:13:51 +08:00
|
|
|
#include "llvm/Support/FileSystem.h"
|
|
|
|
#include "llvm/Support/Process.h"
|
|
|
|
#include "llvm/Support/Program.h"
|
2017-10-17 04:09:43 +08:00
|
|
|
#include "llvm/Support/Regex.h"
|
2017-09-02 09:13:51 +08:00
|
|
|
#include "llvm/Support/Timer.h"
|
2021-10-09 02:47:10 +08:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2018-07-14 06:26:41 +08:00
|
|
|
#include <map>
|
2018-10-03 08:16:26 +08:00
|
|
|
#include <unordered_map>
|
2021-06-24 06:10:47 +08:00
|
|
|
#include <utility>
|
2017-09-02 09:13:51 +08:00
|
|
|
|
|
|
|
#define DEBUG_TYPE "aggregator"
|
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
using namespace bolt;
|
|
|
|
|
|
|
|
namespace opts {
|
|
|
|
|
2018-05-17 04:31:13 +08:00
|
|
|
static cl::opt<bool>
|
|
|
|
BasicAggregation("nl",
|
|
|
|
cl::desc("aggregate basic samples (without LBR info)"),
|
2017-09-02 09:13:51 +08:00
|
|
|
cl::init(false),
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
cl::cat(AggregatorCategory));
|
|
|
|
|
[BOLT] Improve ICP activation policy and hot jt processing
Summary:
Previously, ICP worked with a budget of N targets to convert to
direct calls. As long as the frequency of up to N of the hottest targets
surpassed a given fraction (threshold) of the total frequency, say, 90%,
then the optimization would convert a number of targets (up to N) to
direct calls. Otherwise, it would completely abort processing this call
site. The intent was to convert a given fraction of the indirect call
site frequency to use direct calls instead, but this ends up being a
"all or nothing" strategy.
In this patch we change this to operate with the same strategy seem in
LLVM's ICP, with two thresholds. The idea is that the hottest target of
an indirect call site will be compared against these two thresholds: one
checks its frequency relative to the total frequency of the original
indirect call site, and the other checks its frequency relative to the
remaining, unconverted targets (excluding the hottest targets that were
already converted to direct calls). The remaining threshold is typically
set higher than the total threshold. This allows us more control over
ICP.
I expose two pairs of knobs, one for jump tables and another for
indirect calls.
To improve the promotion of hot jump table indices when we have memory
profile, I also fix a bug that could cause us to promote extra indices
besides the hottest ones as seen in the memory profile. When we have the
memory profile, I reapply the dual threshold checks to the memory
profile which specifies exactly which indices are hot. I then update N,
the number of targets to be promoted, based on this new information, and
update frequency information.
To allow us to work with smaller profiles, I also created an option in
perf2bolt to filter out memory samples outside the statically allocated
area of the binary (heap/stack). This option is on by default.
(cherry picked from FBD15187832)
2019-05-03 03:28:34 +08:00
|
|
|
static cl::opt<bool>
|
|
|
|
FilterMemProfile("filter-mem-profile",
|
2020-05-08 14:00:29 +08:00
|
|
|
cl::desc("if processing a memory profile, filter out stack or heap accesses "
|
|
|
|
"that won't be useful for BOLT to reduce profile file size"),
|
[BOLT] Improve ICP activation policy and hot jt processing
Summary:
Previously, ICP worked with a budget of N targets to convert to
direct calls. As long as the frequency of up to N of the hottest targets
surpassed a given fraction (threshold) of the total frequency, say, 90%,
then the optimization would convert a number of targets (up to N) to
direct calls. Otherwise, it would completely abort processing this call
site. The intent was to convert a given fraction of the indirect call
site frequency to use direct calls instead, but this ends up being a
"all or nothing" strategy.
In this patch we change this to operate with the same strategy seem in
LLVM's ICP, with two thresholds. The idea is that the hottest target of
an indirect call site will be compared against these two thresholds: one
checks its frequency relative to the total frequency of the original
indirect call site, and the other checks its frequency relative to the
remaining, unconverted targets (excluding the hottest targets that were
already converted to direct calls). The remaining threshold is typically
set higher than the total threshold. This allows us more control over
ICP.
I expose two pairs of knobs, one for jump tables and another for
indirect calls.
To improve the promotion of hot jump table indices when we have memory
profile, I also fix a bug that could cause us to promote extra indices
besides the hottest ones as seen in the memory profile. When we have the
memory profile, I reapply the dual threshold checks to the memory
profile which specifies exactly which indices are hot. I then update N,
the number of targets to be promoted, based on this new information, and
update frequency information.
To allow us to work with smaller profiles, I also created an option in
perf2bolt to filter out memory samples outside the statically allocated
area of the binary (heap/stack). This option is on by default.
(cherry picked from FBD15187832)
2019-05-03 03:28:34 +08:00
|
|
|
cl::init(true),
|
|
|
|
cl::cat(AggregatorCategory));
|
|
|
|
|
2020-05-08 14:00:29 +08:00
|
|
|
static cl::opt<unsigned long long>
|
|
|
|
FilterPID("pid",
|
|
|
|
cl::desc("only use samples from process with specified PID"),
|
|
|
|
cl::init(0),
|
|
|
|
cl::Optional,
|
|
|
|
cl::cat(AggregatorCategory));
|
|
|
|
|
|
|
|
static cl::opt<bool>
|
|
|
|
IgnoreBuildID("ignore-build-id",
|
|
|
|
cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
|
|
|
|
cl::init(false),
|
|
|
|
cl::cat(AggregatorCategory));
|
|
|
|
|
|
|
|
static cl::opt<bool>
|
|
|
|
IgnoreInterruptLBR("ignore-interrupt-lbr",
|
|
|
|
cl::desc("ignore kernel interrupt LBR that happens asynchronously"),
|
|
|
|
cl::init(true),
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
cl::cat(AggregatorCategory));
|
|
|
|
|
|
|
|
static cl::opt<unsigned long long>
|
|
|
|
MaxSamples("max-samples",
|
|
|
|
cl::init(-1ULL),
|
|
|
|
cl::desc("maximum number of samples to read from LBR profile"),
|
|
|
|
cl::Optional,
|
|
|
|
cl::Hidden,
|
|
|
|
cl::cat(AggregatorCategory));
|
|
|
|
|
2018-07-18 09:31:46 +08:00
|
|
|
static cl::opt<bool>
|
|
|
|
ReadPreAggregated("pa",
|
|
|
|
cl::desc("skip perf and read data from a pre-aggregated file format"),
|
|
|
|
cl::init(false),
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
cl::cat(AggregatorCategory));
|
|
|
|
|
2018-05-17 04:31:13 +08:00
|
|
|
static cl::opt<bool>
|
2019-02-06 07:28:19 +08:00
|
|
|
TimeAggregator("time-aggr",
|
|
|
|
cl::desc("time BOLT aggregator"),
|
2018-05-17 04:31:13 +08:00
|
|
|
cl::init(false),
|
2019-02-06 07:28:19 +08:00
|
|
|
cl::ZeroOrMore,
|
2018-05-17 04:31:13 +08:00
|
|
|
cl::cat(AggregatorCategory));
|
|
|
|
|
2019-06-07 10:38:06 +08:00
|
|
|
static cl::opt<bool>
|
|
|
|
UseEventPC("use-event-pc",
|
|
|
|
cl::desc("use event PC in combination with LBR sampling"),
|
|
|
|
cl::init(false),
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
cl::cat(AggregatorCategory));
|
|
|
|
|
2018-05-17 04:31:13 +08:00
|
|
|
static cl::opt<bool>
|
2019-02-06 07:28:19 +08:00
|
|
|
WriteAutoFDOData("autofdo",
|
|
|
|
cl::desc("generate autofdo textual data instead of bolt data"),
|
2018-04-14 02:18:46 +08:00
|
|
|
cl::init(false),
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
cl::cat(AggregatorCategory));
|
|
|
|
|
2021-12-15 08:52:51 +08:00
|
|
|
} // namespace opts
|
2017-09-02 09:13:51 +08:00
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-07 07:00:23 +08:00
|
|
|
const char TimerGroupName[] = "aggregator";
|
|
|
|
const char TimerGroupDesc[] = "Aggregator";
|
2017-09-02 09:13:51 +08:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2020-06-16 07:06:07 +08:00
|
|
|
constexpr uint64_t DataAggregator::KernelBaseAddr;
|
|
|
|
|
2021-12-15 08:52:51 +08:00
|
|
|
DataAggregator::~DataAggregator() { deleteTempFiles(); }
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
|
|
|
|
namespace {
|
|
|
|
void deleteTempFile(const std::string &FileName) {
|
2021-12-29 10:29:54 +08:00
|
|
|
if (std::error_code Errc = sys::fs::remove(FileName.c_str()))
|
2021-12-15 08:52:51 +08:00
|
|
|
errs() << "PERF2BOLT: failed to delete temporary file " << FileName
|
|
|
|
<< " with error " << Errc.message() << "\n";
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void DataAggregator::deleteTempFiles() {
|
2021-12-29 10:29:54 +08:00
|
|
|
for (std::string &FileName : TempFiles)
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
deleteTempFile(FileName);
|
|
|
|
TempFiles.clear();
|
|
|
|
}
|
|
|
|
|
2017-09-02 09:13:51 +08:00
|
|
|
void DataAggregator::findPerfExecutable() {
|
2021-04-08 15:19:26 +08:00
|
|
|
Optional<std::string> PerfExecutable =
|
|
|
|
sys::Process::FindInEnvPath("PATH", "perf");
|
2017-09-02 09:13:51 +08:00
|
|
|
if (!PerfExecutable) {
|
|
|
|
outs() << "PERF2BOLT: No perf executable found!\n";
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
PerfPath = *PerfExecutable;
|
|
|
|
}
|
|
|
|
|
2020-05-08 14:00:29 +08:00
|
|
|
void DataAggregator::start() {
|
2021-12-15 08:52:51 +08:00
|
|
|
outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n";
|
2018-07-18 09:31:46 +08:00
|
|
|
|
|
|
|
// Don't launch perf for pre-aggregated files
|
|
|
|
if (opts::ReadPreAggregated)
|
|
|
|
return;
|
|
|
|
|
2017-09-02 09:13:51 +08:00
|
|
|
findPerfExecutable();
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
|
2021-12-29 10:29:54 +08:00
|
|
|
if (opts::BasicAggregation)
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
launchPerfProcess("events without LBR",
|
2019-01-16 15:43:40 +08:00
|
|
|
MainEventsPPI,
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
"script -F pid,event,ip",
|
|
|
|
/*Wait = */false);
|
2021-12-29 10:29:54 +08:00
|
|
|
else
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
launchPerfProcess("branch events",
|
2019-01-16 15:43:40 +08:00
|
|
|
MainEventsPPI,
|
2019-01-23 09:21:45 +08:00
|
|
|
"script -F pid,ip,brstack",
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
/*Wait = */false);
|
|
|
|
|
|
|
|
// Note: we launch script for mem events regardless of the option, as the
|
|
|
|
// command fails fairly fast if mem events were not collected.
|
|
|
|
launchPerfProcess("mem events",
|
|
|
|
MemEventsPPI,
|
|
|
|
"script -F pid,event,addr,ip",
|
|
|
|
/*Wait = */false);
|
|
|
|
|
|
|
|
launchPerfProcess("process events",
|
|
|
|
MMapEventsPPI,
|
|
|
|
"script --show-mmap-events",
|
|
|
|
/*Wait = */false);
|
|
|
|
|
|
|
|
launchPerfProcess("task events",
|
|
|
|
TaskEventsPPI,
|
|
|
|
"script --show-task-events",
|
|
|
|
/*Wait = */false);
|
2017-09-02 09:13:51 +08:00
|
|
|
}
|
|
|
|
|
2017-10-07 05:42:46 +08:00
|
|
|
void DataAggregator::abort() {
|
2018-07-18 09:31:46 +08:00
|
|
|
if (opts::ReadPreAggregated)
|
|
|
|
return;
|
|
|
|
|
2017-10-07 05:42:46 +08:00
|
|
|
std::string Error;
|
|
|
|
|
|
|
|
// Kill subprocesses in case they are not finished
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
sys::Wait(TaskEventsPPI.PI, 1, false, &Error);
|
|
|
|
sys::Wait(MMapEventsPPI.PI, 1, false, &Error);
|
2019-01-16 15:43:40 +08:00
|
|
|
sys::Wait(MainEventsPPI.PI, 1, false, &Error);
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
sys::Wait(MemEventsPPI.PI, 1, false, &Error);
|
2017-10-07 05:42:46 +08:00
|
|
|
|
|
|
|
deleteTempFiles();
|
|
|
|
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
exit(1);
|
2017-10-17 04:09:43 +08:00
|
|
|
}
|
|
|
|
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI,
|
|
|
|
const char *ArgsString, bool Wait) {
|
2020-12-02 08:29:39 +08:00
|
|
|
SmallVector<StringRef, 4> Argv;
|
2017-10-17 04:09:43 +08:00
|
|
|
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
outs() << "PERF2BOLT: spawning perf job to read " << Name << '\n';
|
2017-10-17 04:09:43 +08:00
|
|
|
Argv.push_back(PerfPath.data());
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
char *WritableArgsString = strdup(ArgsString);
|
|
|
|
char *Str = WritableArgsString;
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
do {
|
|
|
|
Argv.push_back(Str);
|
|
|
|
while (*Str && *Str != ' ')
|
|
|
|
++Str;
|
|
|
|
if (!*Str)
|
|
|
|
break;
|
|
|
|
*Str++ = 0;
|
|
|
|
} while (true);
|
2017-09-02 09:13:51 +08:00
|
|
|
|
2019-05-01 08:08:22 +08:00
|
|
|
Argv.push_back("-f");
|
2017-09-02 09:13:51 +08:00
|
|
|
Argv.push_back("-i");
|
2020-05-08 14:00:29 +08:00
|
|
|
Argv.push_back(Filename.c_str());
|
2017-09-02 09:13:51 +08:00
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
if (std::error_code Errc =
|
|
|
|
sys::fs::createTemporaryFile("perf.script", "out", PPI.StdoutPath)) {
|
2021-12-15 08:52:51 +08:00
|
|
|
errs() << "PERF2BOLT: failed to create temporary file " << PPI.StdoutPath
|
|
|
|
<< " with error " << Errc.message() << "\n";
|
2017-09-02 09:13:51 +08:00
|
|
|
exit(1);
|
|
|
|
}
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
TempFiles.push_back(PPI.StdoutPath.data());
|
2017-09-02 09:13:51 +08:00
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
if (std::error_code Errc =
|
|
|
|
sys::fs::createTemporaryFile("perf.script", "err", PPI.StderrPath)) {
|
2021-12-15 08:52:51 +08:00
|
|
|
errs() << "PERF2BOLT: failed to create temporary file " << PPI.StderrPath
|
|
|
|
<< " with error " << Errc.message() << "\n";
|
2017-09-02 09:13:51 +08:00
|
|
|
exit(1);
|
|
|
|
}
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
TempFiles.push_back(PPI.StderrPath.data());
|
2017-09-02 09:13:51 +08:00
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-07 07:00:23 +08:00
|
|
|
Optional<StringRef> Redirects[] = {
|
2019-04-06 08:27:25 +08:00
|
|
|
llvm::None, // Stdin
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
StringRef(PPI.StdoutPath.data()), // Stdout
|
|
|
|
StringRef(PPI.StderrPath.data())}; // Stderr
|
2017-09-02 09:13:51 +08:00
|
|
|
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG({
|
|
|
|
dbgs() << "Launching perf: ";
|
|
|
|
for (StringRef Arg : Argv)
|
|
|
|
dbgs() << Arg << " ";
|
|
|
|
dbgs() << " 1> " << PPI.StdoutPath.data() << " 2> " << PPI.StderrPath.data()
|
|
|
|
<< "\n";
|
|
|
|
});
|
2017-09-02 09:13:51 +08:00
|
|
|
|
2021-12-29 10:29:54 +08:00
|
|
|
if (Wait)
|
2020-12-02 08:29:39 +08:00
|
|
|
PPI.PI.ReturnCode = sys::ExecuteAndWait(PerfPath.data(), Argv,
|
|
|
|
/*envp*/ llvm::None, Redirects);
|
2021-12-29 10:29:54 +08:00
|
|
|
else
|
2020-12-02 08:29:39 +08:00
|
|
|
PPI.PI = sys::ExecuteNoWait(PerfPath.data(), Argv, /*envp*/ llvm::None,
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
Redirects);
|
2017-09-02 09:13:51 +08:00
|
|
|
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
free(WritableArgsString);
|
2017-09-02 09:13:51 +08:00
|
|
|
}
|
|
|
|
|
2018-05-17 04:31:13 +08:00
|
|
|
void DataAggregator::processFileBuildID(StringRef FileBuildID) {
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
PerfProcessInfo BuildIDProcessInfo;
|
|
|
|
launchPerfProcess("buildid list",
|
|
|
|
BuildIDProcessInfo,
|
|
|
|
"buildid-list",
|
|
|
|
/*Wait = */true);
|
2017-10-07 05:42:46 +08:00
|
|
|
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
if (BuildIDProcessInfo.PI.ReturnCode != 0) {
|
2017-10-07 05:42:46 +08:00
|
|
|
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
|
2021-12-15 08:52:51 +08:00
|
|
|
MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StderrPath.data());
|
2017-10-07 05:42:46 +08:00
|
|
|
StringRef ErrBuf = (*MB)->getBuffer();
|
|
|
|
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
errs() << "PERF-ERROR: return code " << BuildIDProcessInfo.PI.ReturnCode
|
|
|
|
<< '\n';
|
2017-10-07 05:42:46 +08:00
|
|
|
errs() << ErrBuf;
|
2018-05-17 04:31:13 +08:00
|
|
|
return;
|
2017-10-07 05:42:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
|
2021-12-15 08:52:51 +08:00
|
|
|
MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StdoutPath.data());
|
2017-10-07 05:42:46 +08:00
|
|
|
if (std::error_code EC = MB.getError()) {
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
errs() << "Cannot open " << BuildIDProcessInfo.StdoutPath.data() << ": "
|
2017-10-07 05:42:46 +08:00
|
|
|
<< EC.message() << "\n";
|
2018-05-17 04:31:13 +08:00
|
|
|
return;
|
2017-10-07 05:42:46 +08:00
|
|
|
}
|
|
|
|
|
2021-06-24 06:10:47 +08:00
|
|
|
FileBuf = std::move(*MB);
|
2017-10-07 05:42:46 +08:00
|
|
|
ParsingBuf = FileBuf->getBuffer();
|
2018-05-17 04:31:13 +08:00
|
|
|
if (ParsingBuf.empty()) {
|
|
|
|
errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf "
|
|
|
|
"data was recorded without it\n";
|
|
|
|
return;
|
2017-10-07 05:42:46 +08:00
|
|
|
}
|
|
|
|
|
2018-05-17 04:31:13 +08:00
|
|
|
Col = 0;
|
|
|
|
Line = 1;
|
2021-04-08 15:19:26 +08:00
|
|
|
Optional<StringRef> FileName = getFileNameForBuildID(FileBuildID);
|
2018-05-17 04:31:13 +08:00
|
|
|
if (!FileName) {
|
|
|
|
errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. "
|
|
|
|
"This indicates the input binary supplied for data aggregation "
|
|
|
|
"is not the same recorded by perf when collecting profiling "
|
2018-08-15 04:24:44 +08:00
|
|
|
"data, or there were no samples recorded for the binary. "
|
|
|
|
"Use -ignore-build-id option to override.\n";
|
2021-12-29 10:29:54 +08:00
|
|
|
if (!opts::IgnoreBuildID)
|
2018-05-17 04:31:13 +08:00
|
|
|
abort();
|
2020-05-08 14:00:29 +08:00
|
|
|
} else if (*FileName != llvm::sys::path::filename(BC->getFilename())) {
|
2018-07-14 06:26:41 +08:00
|
|
|
errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n";
|
2020-12-02 08:29:39 +08:00
|
|
|
BuildIDBinaryName = std::string(*FileName);
|
2018-05-17 04:31:13 +08:00
|
|
|
} else {
|
|
|
|
outs() << "PERF2BOLT: matched build-id and file name\n";
|
|
|
|
}
|
2017-10-07 05:42:46 +08:00
|
|
|
|
2018-05-17 04:31:13 +08:00
|
|
|
return;
|
2017-10-07 05:42:46 +08:00
|
|
|
}
|
|
|
|
|
2017-09-02 09:13:51 +08:00
|
|
|
bool DataAggregator::checkPerfDataMagic(StringRef FileName) {
|
2018-07-18 09:31:46 +08:00
|
|
|
if (opts::ReadPreAggregated)
|
|
|
|
return true;
|
|
|
|
|
2021-11-12 10:14:53 +08:00
|
|
|
Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(FileName);
|
|
|
|
if (!FD)
|
2017-09-02 09:13:51 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
char Buf[7] = {0, 0, 0, 0, 0, 0, 0};
|
|
|
|
|
2021-11-12 10:14:53 +08:00
|
|
|
auto Close = make_scope_exit([&] { sys::fs::closeFile(*FD); });
|
|
|
|
Expected<size_t> BytesRead = sys::fs::readNativeFileSlice(
|
|
|
|
*FD, makeMutableArrayRef(Buf, sizeof(Buf)), 0);
|
|
|
|
if (!BytesRead || *BytesRead != 7)
|
2017-09-02 09:13:51 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
if (strncmp(Buf, "PERFILE", 7) == 0)
|
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-01-16 15:43:40 +08:00
|
|
|
void DataAggregator::parsePreAggregated() {
|
2018-07-18 09:31:46 +08:00
|
|
|
std::string Error;
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
|
|
|
|
MemoryBuffer::getFileOrSTDIN(Filename);
|
2018-07-18 09:31:46 +08:00
|
|
|
if (std::error_code EC = MB.getError()) {
|
2020-05-08 14:00:29 +08:00
|
|
|
errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": "
|
2018-07-18 09:31:46 +08:00
|
|
|
<< EC.message() << "\n";
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2021-06-24 06:10:47 +08:00
|
|
|
FileBuf = std::move(*MB);
|
2018-07-18 09:31:46 +08:00
|
|
|
ParsingBuf = FileBuf->getBuffer();
|
|
|
|
Col = 0;
|
|
|
|
Line = 1;
|
2019-01-16 15:43:40 +08:00
|
|
|
if (parsePreAggregatedLBRSamples()) {
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
errs() << "PERF2BOLT: failed to parse samples\n";
|
2018-07-18 09:31:46 +08:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-08 14:00:29 +08:00
|
|
|
std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) {
|
2019-01-23 09:21:45 +08:00
|
|
|
outs() << "PERF2BOLT: writing data for autofdo tools...\n";
|
2021-12-15 08:52:51 +08:00
|
|
|
NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName,
|
|
|
|
TimerGroupDesc, opts::TimeAggregator);
|
2019-01-23 09:21:45 +08:00
|
|
|
|
|
|
|
std::error_code EC;
|
2020-12-02 08:29:39 +08:00
|
|
|
raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
|
2019-01-23 09:21:45 +08:00
|
|
|
if (EC)
|
|
|
|
return EC;
|
|
|
|
|
|
|
|
// Format:
|
|
|
|
// number of unique traces
|
|
|
|
// from_1-to_1:count_1
|
|
|
|
// from_2-to_2:count_2
|
|
|
|
// ......
|
|
|
|
// from_n-to_n:count_n
|
|
|
|
// number of unique sample addresses
|
|
|
|
// addr_1:count_1
|
|
|
|
// addr_2:count_2
|
|
|
|
// ......
|
|
|
|
// addr_n:count_n
|
|
|
|
// number of unique LBR entries
|
|
|
|
// src_1->dst_1:count_1
|
|
|
|
// src_2->dst_2:count_2
|
|
|
|
// ......
|
|
|
|
// src_n->dst_n:count_n
|
|
|
|
|
|
|
|
const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress;
|
|
|
|
|
|
|
|
// AutoFDO addresses are relative to the first allocated loadable program
|
|
|
|
// segment
|
|
|
|
auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t {
|
|
|
|
if (Address < FirstAllocAddress)
|
|
|
|
return 0;
|
|
|
|
return Address - FirstAllocAddress;
|
|
|
|
};
|
|
|
|
|
|
|
|
OutFile << FallthroughLBRs.size() << "\n";
|
|
|
|
for (const auto &AggrLBR : FallthroughLBRs) {
|
2021-04-08 15:19:26 +08:00
|
|
|
const Trace &Trace = AggrLBR.first;
|
|
|
|
const FTInfo &Info = AggrLBR.second;
|
2019-01-23 09:21:45 +08:00
|
|
|
OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "-"
|
|
|
|
<< Twine::utohexstr(filterAddress(Trace.To)) << ":"
|
|
|
|
<< (Info.InternCount + Info.ExternCount) << "\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
OutFile << BasicSamples.size() << "\n";
|
|
|
|
for (const auto &Sample : BasicSamples) {
|
2021-04-08 15:19:26 +08:00
|
|
|
uint64_t PC = Sample.first;
|
|
|
|
uint64_t HitCount = Sample.second;
|
2019-01-23 09:21:45 +08:00
|
|
|
OutFile << Twine::utohexstr(filterAddress(PC)) << ":" << HitCount << "\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
OutFile << BranchLBRs.size() << "\n";
|
|
|
|
for (const auto &AggrLBR : BranchLBRs) {
|
2021-04-08 15:19:26 +08:00
|
|
|
const Trace &Trace = AggrLBR.first;
|
|
|
|
const BranchInfo &Info = AggrLBR.second;
|
2019-01-23 09:21:45 +08:00
|
|
|
OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "->"
|
|
|
|
<< Twine::utohexstr(filterAddress(Trace.To)) << ":"
|
|
|
|
<< Info.TakenCount << "\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, "
|
|
|
|
<< BasicSamples.size() << " sample addresses and " << BranchLBRs.size()
|
2020-05-08 14:00:29 +08:00
|
|
|
<< " unique branches to " << OutputFilename << "\n";
|
2019-01-23 09:21:45 +08:00
|
|
|
|
|
|
|
return std::error_code();
|
|
|
|
}
|
|
|
|
|
2019-09-04 13:24:06 +08:00
|
|
|
void DataAggregator::filterBinaryMMapInfo() {
|
|
|
|
if (opts::FilterPID) {
|
|
|
|
auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID);
|
|
|
|
if (MMapInfoIter != BinaryMMapInfo.end()) {
|
2021-04-08 15:19:26 +08:00
|
|
|
MMapInfo MMap = MMapInfoIter->second;
|
2019-09-04 13:24:06 +08:00
|
|
|
BinaryMMapInfo.clear();
|
|
|
|
BinaryMMapInfo.insert(std::make_pair(MMap.PID, MMap));
|
|
|
|
} else {
|
|
|
|
if (errs().has_colors())
|
|
|
|
errs().changeColor(raw_ostream::RED);
|
|
|
|
errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \""
|
2021-12-15 08:52:51 +08:00
|
|
|
<< opts::FilterPID << "\""
|
|
|
|
<< " for binary \"" << BC->getFilename() << "\".";
|
2019-09-04 13:24:06 +08:00
|
|
|
assert(!BinaryMMapInfo.empty() && "No memory map for matching binary");
|
|
|
|
errs() << " Profile for the following process is available:\n";
|
2021-12-29 10:29:54 +08:00
|
|
|
for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
|
2020-05-08 14:00:29 +08:00
|
|
|
outs() << " " << MMI.second.PID
|
|
|
|
<< (MMI.second.Forked ? " (forked)\n" : "\n");
|
2021-12-29 10:29:54 +08:00
|
|
|
|
2019-09-04 13:24:06 +08:00
|
|
|
if (errs().has_colors())
|
|
|
|
errs().resetColor();
|
|
|
|
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-08 14:00:29 +08:00
|
|
|
Error DataAggregator::preprocessProfile(BinaryContext &BC) {
|
2017-09-02 09:13:51 +08:00
|
|
|
this->BC = &BC;
|
|
|
|
|
2019-01-16 15:43:40 +08:00
|
|
|
if (opts::ReadPreAggregated) {
|
|
|
|
parsePreAggregated();
|
2020-05-08 14:00:29 +08:00
|
|
|
return Error::success();
|
|
|
|
}
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
if (Optional<StringRef> FileBuildID = BC.getFileBuildID()) {
|
2020-05-08 14:00:29 +08:00
|
|
|
outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n";
|
|
|
|
processFileBuildID(*FileBuildID);
|
|
|
|
} else {
|
|
|
|
errs() << "BOLT-WARNING: build-id will not be checked because we could "
|
|
|
|
"not read one from input binary\n";
|
2019-01-16 15:43:40 +08:00
|
|
|
}
|
2018-07-18 09:31:46 +08:00
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
auto prepareToParse = [&](StringRef Name, PerfProcessInfo &Process) {
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
std::string Error;
|
|
|
|
outs() << "PERF2BOLT: waiting for perf " << Name
|
|
|
|
<< " collection to finish...\n";
|
2021-04-08 15:19:26 +08:00
|
|
|
sys::ProcessInfo PI = sys::Wait(Process.PI, 0, true, &Error);
|
2017-09-02 09:13:51 +08:00
|
|
|
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
if (!Error.empty()) {
|
|
|
|
errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n";
|
|
|
|
deleteTempFiles();
|
|
|
|
exit(1);
|
|
|
|
}
|
2017-09-02 09:13:51 +08:00
|
|
|
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
if (PI.ReturnCode != 0) {
|
|
|
|
ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorMB =
|
2021-12-15 08:52:51 +08:00
|
|
|
MemoryBuffer::getFileOrSTDIN(Process.StderrPath.data());
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
StringRef ErrBuf = (*ErrorMB)->getBuffer();
|
2017-09-02 09:13:51 +08:00
|
|
|
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n";
|
|
|
|
errs() << ErrBuf;
|
|
|
|
deleteTempFiles();
|
|
|
|
exit(1);
|
|
|
|
}
|
2017-09-02 09:13:51 +08:00
|
|
|
|
|
|
|
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
|
2021-12-15 08:52:51 +08:00
|
|
|
MemoryBuffer::getFileOrSTDIN(Process.StdoutPath.data());
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
if (std::error_code EC = MB.getError()) {
|
|
|
|
errs() << "Cannot open " << Process.StdoutPath.data() << ": "
|
|
|
|
<< EC.message() << "\n";
|
|
|
|
deleteTempFiles();
|
|
|
|
exit(1);
|
|
|
|
}
|
2017-09-02 09:13:51 +08:00
|
|
|
|
2021-06-24 06:10:47 +08:00
|
|
|
FileBuf = std::move(*MB);
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
ParsingBuf = FileBuf->getBuffer();
|
|
|
|
Col = 0;
|
|
|
|
Line = 1;
|
|
|
|
};
|
|
|
|
|
Generate heatmap for linux kernel
Summary:
This diff handles several challenges related to heatmap generation for Linux kernel (vmlinux elf file):
- If the input binary elf file contains the section `__ksymtab`, this diff assumes that this is the linux kernel `vmlinux` file and enables an extra flag `LinuxKernelMode`
- In `LinuxKernelMode`, we only support heat map generation right now, therefore it ensures that current BOLT mode is heat map generation. Otherwise, it exits with error.
- For some Linux symbol and section combinations, BOLT may not be able to find section for symbol (specially symbols that specifies the end of some section). For such cases, we show an warning message without exiting which was the previous behavior.
- Linux kernel elf file does not contain dynamic section, therefore, we don't exit when no dynamic section is found for linux kernel binary.
- Current `ParseMMap` logic does not work with linux kernel. MMap entries for linux kernel uses `PERF_RECORD_MMAP` format instead of typical `PERF_RECORD_MMAP2` format. Since linux kernel address mapping is absolute (same as specified in the ELF file), we avoid calling `ParseMMap` in linux kernel mode.
- Linux kernel entries are registered with PID -1, therefore `BinaryMMapInfo` lookup is not required for linux kernel entries. Similarly, `adjustLBR` is also not required.
- Default max address in linux kernel mode is highest unsigned 64-bit integer instead of current 4GBs.
- Added another new parameter for heatmap, `MinAddress`, in case of Linux kernel mode which is `KernelBaseAddress`, otherwise, it is 0. While registering Heatmap sample counts from LBR entries, any address lower than this `MinAddress` is ignored.
- `IgnoreInterruptLBR` is disabled in linux kernel mode to ensure that kernel entries are processed
Currently, linux kernel heat map also include heat map for Linux kernel modules that are not part of vmlinux elf file. This is intentional to identify other potential optimization opportunities. If reviewers think, those modules should be omitted, I will disable those modules based on highest end address of a vmlinux elf section.
(cherry picked from FBD21992765)
2020-06-11 14:00:39 +08:00
|
|
|
if (opts::LinuxKernelMode) {
|
|
|
|
// Current MMap parsing logic does not work with linux kernel.
|
|
|
|
// MMap entries for linux kernel uses PERF_RECORD_MMAP
|
|
|
|
// format instead of typical PERF_RECORD_MMAP2 format.
|
|
|
|
// Since linux kernel address mapping is absolute (same as
|
|
|
|
// in the ELF file), we avoid parsing MMap in linux kernel mode.
|
|
|
|
// While generating optimized linux kernel binary, we may need
|
|
|
|
// to parse MMap entries.
|
|
|
|
|
|
|
|
// In linux kernel mode, we analyze and optimize
|
|
|
|
// all linux kernel binary instructions, irrespective
|
|
|
|
// of whether they are due to system calls or due to
|
|
|
|
// interrupts. Therefore, we cannot ignore interrupt
|
|
|
|
// in Linux kernel mode.
|
|
|
|
opts::IgnoreInterruptLBR = false;
|
|
|
|
} else {
|
|
|
|
prepareToParse("mmap events", MMapEventsPPI);
|
2021-12-29 10:29:54 +08:00
|
|
|
if (parseMMapEvents())
|
Generate heatmap for linux kernel
Summary:
This diff handles several challenges related to heatmap generation for Linux kernel (vmlinux elf file):
- If the input binary elf file contains the section `__ksymtab`, this diff assumes that this is the linux kernel `vmlinux` file and enables an extra flag `LinuxKernelMode`
- In `LinuxKernelMode`, we only support heat map generation right now, therefore it ensures that current BOLT mode is heat map generation. Otherwise, it exits with error.
- For some Linux symbol and section combinations, BOLT may not be able to find section for symbol (specially symbols that specifies the end of some section). For such cases, we show an warning message without exiting which was the previous behavior.
- Linux kernel elf file does not contain dynamic section, therefore, we don't exit when no dynamic section is found for linux kernel binary.
- Current `ParseMMap` logic does not work with linux kernel. MMap entries for linux kernel uses `PERF_RECORD_MMAP` format instead of typical `PERF_RECORD_MMAP2` format. Since linux kernel address mapping is absolute (same as specified in the ELF file), we avoid calling `ParseMMap` in linux kernel mode.
- Linux kernel entries are registered with PID -1, therefore `BinaryMMapInfo` lookup is not required for linux kernel entries. Similarly, `adjustLBR` is also not required.
- Default max address in linux kernel mode is highest unsigned 64-bit integer instead of current 4GBs.
- Added another new parameter for heatmap, `MinAddress`, in case of Linux kernel mode which is `KernelBaseAddress`, otherwise, it is 0. While registering Heatmap sample counts from LBR entries, any address lower than this `MinAddress` is ignored.
- `IgnoreInterruptLBR` is disabled in linux kernel mode to ensure that kernel entries are processed
Currently, linux kernel heat map also include heat map for Linux kernel modules that are not part of vmlinux elf file. This is intentional to identify other potential optimization opportunities. If reviewers think, those modules should be omitted, I will disable those modules based on highest end address of a vmlinux elf section.
(cherry picked from FBD21992765)
2020-06-11 14:00:39 +08:00
|
|
|
errs() << "PERF2BOLT: failed to parse mmap events\n";
|
2017-09-02 09:13:51 +08:00
|
|
|
}
|
|
|
|
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
prepareToParse("task events", TaskEventsPPI);
|
2021-12-29 10:29:54 +08:00
|
|
|
if (parseTaskEvents())
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
errs() << "PERF2BOLT: failed to parse task events\n";
|
2017-09-02 09:13:51 +08:00
|
|
|
|
2019-09-04 13:24:06 +08:00
|
|
|
filterBinaryMMapInfo();
|
2019-01-16 15:43:40 +08:00
|
|
|
prepareToParse("events", MainEventsPPI);
|
2019-02-06 07:28:19 +08:00
|
|
|
|
|
|
|
if (opts::HeatmapMode) {
|
2021-04-08 15:19:26 +08:00
|
|
|
if (std::error_code EC = printLBRHeatMap()) {
|
2019-02-06 07:28:19 +08:00
|
|
|
errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
|
2018-04-14 02:18:46 +08:00
|
|
|
if ((!opts::BasicAggregation && parseBranchEvents()) ||
|
2021-12-29 10:29:54 +08:00
|
|
|
(opts::BasicAggregation && parseBasicEvents()))
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
errs() << "PERF2BOLT: failed to parse samples\n";
|
2017-10-17 04:09:43 +08:00
|
|
|
|
2019-01-23 09:21:45 +08:00
|
|
|
// We can finish early if the goal is just to generate data for autofdo
|
|
|
|
if (opts::WriteAutoFDOData) {
|
2021-12-29 10:29:54 +08:00
|
|
|
if (std::error_code EC = writeAutoFDOData(opts::OutputFilename))
|
2019-01-23 09:21:45 +08:00
|
|
|
errs() << "Error writing autofdo data to file: " << EC.message() << "\n";
|
2021-12-29 10:29:54 +08:00
|
|
|
|
2019-01-23 09:21:45 +08:00
|
|
|
deleteTempFiles();
|
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
// Special handling for memory events
|
|
|
|
std::string Error;
|
2021-04-08 15:19:26 +08:00
|
|
|
sys::ProcessInfo PI = sys::Wait(MemEventsPPI.PI, 0, true, &Error);
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
if (PI.ReturnCode != 0) {
|
2017-10-17 04:09:43 +08:00
|
|
|
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
|
2021-12-15 08:52:51 +08:00
|
|
|
MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StderrPath.data());
|
2017-10-17 04:09:43 +08:00
|
|
|
StringRef ErrBuf = (*MB)->getBuffer();
|
|
|
|
|
|
|
|
deleteTempFiles();
|
|
|
|
|
2017-11-29 01:57:21 +08:00
|
|
|
Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
|
|
|
|
"Cannot print 'addr' field.");
|
2017-10-17 04:09:43 +08:00
|
|
|
if (!NoData.match(ErrBuf)) {
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n";
|
2017-10-17 04:09:43 +08:00
|
|
|
errs() << ErrBuf;
|
|
|
|
exit(1);
|
|
|
|
}
|
2020-05-08 14:00:29 +08:00
|
|
|
return Error::success();
|
2017-10-17 04:09:43 +08:00
|
|
|
}
|
|
|
|
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
|
2021-12-15 08:52:51 +08:00
|
|
|
MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StdoutPath.data());
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
if (std::error_code EC = MB.getError()) {
|
|
|
|
errs() << "Cannot open " << MemEventsPPI.StdoutPath.data() << ": "
|
2017-10-17 04:09:43 +08:00
|
|
|
<< EC.message() << "\n";
|
|
|
|
deleteTempFiles();
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2021-06-24 06:10:47 +08:00
|
|
|
FileBuf = std::move(*MB);
|
2017-10-17 04:09:43 +08:00
|
|
|
ParsingBuf = FileBuf->getBuffer();
|
|
|
|
Col = 0;
|
|
|
|
Line = 1;
|
2021-12-29 10:29:54 +08:00
|
|
|
if (const std::error_code EC = parseMemEvents())
|
2021-12-15 08:52:51 +08:00
|
|
|
errs() << "PERF2BOLT: failed to parse memory events: " << EC.message()
|
|
|
|
<< '\n';
|
2017-09-02 09:13:51 +08:00
|
|
|
|
2017-10-17 04:09:43 +08:00
|
|
|
deleteTempFiles();
|
2020-05-08 14:00:29 +08:00
|
|
|
|
|
|
|
return Error::success();
|
|
|
|
}
|
|
|
|
|
|
|
|
Error DataAggregator::readProfile(BinaryContext &BC) {
|
|
|
|
processProfile(BC);
|
|
|
|
|
|
|
|
for (auto &BFI : BC.getBinaryFunctions()) {
|
2021-04-08 15:19:26 +08:00
|
|
|
BinaryFunction &Function = BFI.second;
|
2020-05-08 14:00:29 +08:00
|
|
|
convertBranchData(Function);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (opts::AggregateOnly) {
|
2021-12-29 10:29:54 +08:00
|
|
|
if (std::error_code EC = writeAggregatedFile(opts::OutputFilename))
|
2020-05-08 14:00:29 +08:00
|
|
|
report_error("cannot create output data file", EC);
|
|
|
|
}
|
|
|
|
|
|
|
|
return Error::success();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) {
|
|
|
|
return Function.hasProfileAvailable();
|
2019-01-16 15:43:40 +08:00
|
|
|
}
|
2017-11-29 01:57:21 +08:00
|
|
|
|
2019-04-04 06:52:01 +08:00
|
|
|
void DataAggregator::processProfile(BinaryContext &BC) {
|
2019-01-16 15:43:40 +08:00
|
|
|
if (opts::ReadPreAggregated)
|
|
|
|
processPreAggregated();
|
|
|
|
else if (opts::BasicAggregation)
|
|
|
|
processBasicEvents();
|
|
|
|
else
|
|
|
|
processBranchEvents();
|
|
|
|
|
|
|
|
processMemEvents();
|
|
|
|
|
|
|
|
// Mark all functions with registered events as having a valid profile.
|
2019-04-04 06:52:01 +08:00
|
|
|
for (auto &BFI : BC.getBinaryFunctions()) {
|
2021-04-08 15:19:26 +08:00
|
|
|
BinaryFunction &BF = BFI.second;
|
2020-05-08 14:00:29 +08:00
|
|
|
if (getBranchData(BF)) {
|
2019-01-16 15:43:40 +08:00
|
|
|
const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
|
|
|
|
: BinaryFunction::PF_LBR;
|
|
|
|
BF.markProfiled(Flags);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Release intermediate storage.
|
|
|
|
clear(BranchLBRs);
|
|
|
|
clear(FallthroughLBRs);
|
|
|
|
clear(AggregatedLBRs);
|
|
|
|
clear(BasicSamples);
|
|
|
|
clear(MemSamples);
|
2017-09-02 09:13:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
BinaryFunction *
|
2019-04-13 08:33:46 +08:00
|
|
|
DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const {
|
2018-10-03 08:16:26 +08:00
|
|
|
if (!BC->containsAddress(Address))
|
|
|
|
return nullptr;
|
|
|
|
|
2019-04-04 06:52:01 +08:00
|
|
|
return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false,
|
2020-09-15 06:48:32 +08:00
|
|
|
/*UseMaxSize=*/true);
|
2017-09-02 09:13:51 +08:00
|
|
|
}
|
|
|
|
|
2019-04-13 08:33:46 +08:00
|
|
|
StringRef DataAggregator::getLocationName(BinaryFunction &Func,
|
|
|
|
uint64_t Count) {
|
|
|
|
if (!BAT)
|
2020-01-14 03:56:59 +08:00
|
|
|
return Func.getOneName();
|
2019-04-13 08:33:46 +08:00
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
const BinaryFunction *OrigFunc = &Func;
|
|
|
|
if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress())) {
|
2019-04-13 08:33:46 +08:00
|
|
|
NumColdSamples += Count;
|
2021-04-08 15:19:26 +08:00
|
|
|
BinaryFunction *HotFunc = getBinaryFunctionContainingAddress(HotAddr);
|
2019-04-13 08:33:46 +08:00
|
|
|
if (HotFunc)
|
|
|
|
OrigFunc = HotFunc;
|
|
|
|
}
|
|
|
|
// If it is a local function, prefer the name containing the file name where
|
|
|
|
// the local function was declared
|
2021-04-08 15:19:26 +08:00
|
|
|
for (StringRef AlternativeName : OrigFunc->getNames()) {
|
2019-04-13 08:33:46 +08:00
|
|
|
size_t FileNameIdx = AlternativeName.find('/');
|
|
|
|
// Confirm the alternative name has the pattern Symbol/FileName/1 before
|
|
|
|
// using it
|
|
|
|
if (FileNameIdx == StringRef::npos ||
|
|
|
|
AlternativeName.find('/', FileNameIdx + 1) == StringRef::npos)
|
|
|
|
continue;
|
|
|
|
return AlternativeName;
|
|
|
|
}
|
2020-08-19 02:55:56 +08:00
|
|
|
return OrigFunc->getOneName();
|
2019-04-13 08:33:46 +08:00
|
|
|
}
|
|
|
|
|
2019-01-23 09:21:45 +08:00
|
|
|
bool DataAggregator::doSample(BinaryFunction &Func, uint64_t Address,
|
|
|
|
uint64_t Count) {
|
2020-05-08 14:00:29 +08:00
|
|
|
auto I = NamesToSamples.find(Func.getOneName());
|
|
|
|
if (I == NamesToSamples.end()) {
|
2018-04-14 02:18:46 +08:00
|
|
|
bool Success;
|
2019-04-13 08:33:46 +08:00
|
|
|
StringRef LocName = getLocationName(Func, Count);
|
2021-12-15 08:52:51 +08:00
|
|
|
std::tie(I, Success) = NamesToSamples.insert(
|
|
|
|
std::make_pair(Func.getOneName(),
|
|
|
|
FuncSampleData(LocName, FuncSampleData::ContainerTy())));
|
2018-04-14 02:18:46 +08:00
|
|
|
}
|
|
|
|
|
2019-04-13 08:33:46 +08:00
|
|
|
Address -= Func.getAddress();
|
|
|
|
if (BAT)
|
|
|
|
Address = BAT->translate(Func, Address, /*IsBranchSrc=*/false);
|
|
|
|
|
|
|
|
I->second.bumpCount(Address, Count);
|
2018-04-14 02:18:46 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-07-18 09:31:46 +08:00
|
|
|
bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From,
|
|
|
|
uint64_t To, uint64_t Count,
|
|
|
|
uint64_t Mispreds) {
|
2020-05-08 14:00:29 +08:00
|
|
|
FuncBranchData *AggrData = getBranchData(Func);
|
2017-09-02 09:13:51 +08:00
|
|
|
if (!AggrData) {
|
2020-05-08 14:00:29 +08:00
|
|
|
AggrData = &NamesToBranches[Func.getOneName()];
|
2019-04-13 08:33:46 +08:00
|
|
|
AggrData->Name = getLocationName(Func, Count);
|
2020-05-08 14:00:29 +08:00
|
|
|
setBranchData(Func, AggrData);
|
2017-09-02 09:13:51 +08:00
|
|
|
}
|
|
|
|
|
2019-04-13 08:33:46 +08:00
|
|
|
From -= Func.getAddress();
|
|
|
|
To -= Func.getAddress();
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: " << Func.getPrintName()
|
|
|
|
<< " @ " << Twine::utohexstr(From) << " -> "
|
|
|
|
<< Func.getPrintName() << " @ " << Twine::utohexstr(To)
|
|
|
|
<< '\n');
|
2019-04-13 08:33:46 +08:00
|
|
|
if (BAT) {
|
|
|
|
From = BAT->translate(Func, From, /*IsBranchSrc=*/true);
|
|
|
|
To = BAT->translate(Func, To, /*IsBranchSrc=*/false);
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: BAT translation on bumpBranchCount: "
|
|
|
|
<< Func.getPrintName() << " @ " << Twine::utohexstr(From)
|
|
|
|
<< " -> " << Func.getPrintName() << " @ "
|
|
|
|
<< Twine::utohexstr(To) << '\n');
|
2019-04-13 08:33:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
AggrData->bumpBranchCount(From, To, Count, Mispreds);
|
2017-09-02 09:13:51 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
|
2018-07-18 09:31:46 +08:00
|
|
|
BinaryFunction *ToFunc, uint64_t From,
|
|
|
|
uint64_t To, uint64_t Count,
|
|
|
|
uint64_t Mispreds) {
|
2021-05-14 01:50:47 +08:00
|
|
|
FuncBranchData *FromAggrData = nullptr;
|
|
|
|
FuncBranchData *ToAggrData = nullptr;
|
2017-09-02 09:13:51 +08:00
|
|
|
StringRef SrcFunc;
|
|
|
|
StringRef DstFunc;
|
|
|
|
if (FromFunc) {
|
2019-04-13 08:33:46 +08:00
|
|
|
SrcFunc = getLocationName(*FromFunc, Count);
|
2020-05-08 14:00:29 +08:00
|
|
|
FromAggrData = getBranchData(*FromFunc);
|
2017-09-02 09:13:51 +08:00
|
|
|
if (!FromAggrData) {
|
2020-05-08 14:00:29 +08:00
|
|
|
FromAggrData = &NamesToBranches[FromFunc->getOneName()];
|
2017-09-02 09:13:51 +08:00
|
|
|
FromAggrData->Name = SrcFunc;
|
2020-05-08 14:00:29 +08:00
|
|
|
setBranchData(*FromFunc, FromAggrData);
|
2017-09-02 09:13:51 +08:00
|
|
|
}
|
|
|
|
From -= FromFunc->getAddress();
|
2019-04-13 08:33:46 +08:00
|
|
|
if (BAT)
|
|
|
|
From = BAT->translate(*FromFunc, From, /*IsBranchSrc=*/true);
|
2017-11-29 01:57:21 +08:00
|
|
|
|
2020-05-08 14:00:29 +08:00
|
|
|
recordExit(*FromFunc, From, Mispreds, Count);
|
2017-09-02 09:13:51 +08:00
|
|
|
}
|
|
|
|
if (ToFunc) {
|
2019-04-13 08:33:46 +08:00
|
|
|
DstFunc = getLocationName(*ToFunc, 0);
|
2020-05-08 14:00:29 +08:00
|
|
|
ToAggrData = getBranchData(*ToFunc);
|
2017-09-02 09:13:51 +08:00
|
|
|
if (!ToAggrData) {
|
2020-05-08 14:00:29 +08:00
|
|
|
ToAggrData = &NamesToBranches[ToFunc->getOneName()];
|
2017-09-02 09:13:51 +08:00
|
|
|
ToAggrData->Name = DstFunc;
|
2020-05-08 14:00:29 +08:00
|
|
|
setBranchData(*ToFunc, ToAggrData);
|
2017-09-02 09:13:51 +08:00
|
|
|
}
|
|
|
|
To -= ToFunc->getAddress();
|
2019-04-13 08:33:46 +08:00
|
|
|
if (BAT)
|
|
|
|
To = BAT->translate(*ToFunc, To, /*IsBranchSrc=*/false);
|
2017-11-29 01:57:21 +08:00
|
|
|
|
2020-05-08 14:00:29 +08:00
|
|
|
recordEntry(*ToFunc, To, Mispreds, Count);
|
2017-09-02 09:13:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (FromAggrData)
|
|
|
|
FromAggrData->bumpCallCount(From, Location(!DstFunc.empty(), DstFunc, To),
|
2018-07-18 09:31:46 +08:00
|
|
|
Count, Mispreds);
|
2017-09-02 09:13:51 +08:00
|
|
|
if (ToAggrData)
|
|
|
|
ToAggrData->bumpEntryCount(Location(!SrcFunc.empty(), SrcFunc, From), To,
|
2018-07-18 09:31:46 +08:00
|
|
|
Count, Mispreds);
|
2017-09-02 09:13:51 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-07-18 09:31:46 +08:00
|
|
|
bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
|
|
|
|
uint64_t Mispreds) {
|
2021-04-08 15:19:26 +08:00
|
|
|
BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From);
|
|
|
|
BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To);
|
2017-09-02 09:13:51 +08:00
|
|
|
if (!FromFunc && !ToFunc)
|
|
|
|
return false;
|
|
|
|
|
2017-11-29 01:57:21 +08:00
|
|
|
if (FromFunc == ToFunc) {
|
2020-05-08 14:00:29 +08:00
|
|
|
recordBranch(*FromFunc, From - FromFunc->getAddress(),
|
|
|
|
To - FromFunc->getAddress(), Count, Mispreds);
|
2018-07-18 09:31:46 +08:00
|
|
|
return doIntraBranch(*FromFunc, From, To, Count, Mispreds);
|
2017-11-29 01:57:21 +08:00
|
|
|
}
|
2017-09-02 09:13:51 +08:00
|
|
|
|
2018-07-18 09:31:46 +08:00
|
|
|
return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds);
|
2017-09-02 09:13:51 +08:00
|
|
|
}
|
|
|
|
|
2018-07-18 09:31:46 +08:00
|
|
|
bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
|
|
|
|
uint64_t Count) {
|
2021-04-08 15:19:26 +08:00
|
|
|
BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(First.To);
|
|
|
|
BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From);
|
2017-09-02 09:13:51 +08:00
|
|
|
if (!FromFunc || !ToFunc) {
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(
|
2019-04-13 08:33:46 +08:00
|
|
|
dbgs() << "Out of range trace starting in " << FromFunc->getPrintName()
|
|
|
|
<< " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
|
|
|
|
<< " and ending in " << ToFunc->getPrintName() << " @ "
|
|
|
|
<< ToFunc->getPrintName() << " @ "
|
|
|
|
<< Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
|
2018-07-18 09:31:46 +08:00
|
|
|
NumLongRangeTraces += Count;
|
2017-09-02 09:13:51 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (FromFunc != ToFunc) {
|
2018-07-18 09:31:46 +08:00
|
|
|
NumInvalidTraces += Count;
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(
|
|
|
|
dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
|
|
|
|
<< " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
|
|
|
|
<< " and ending in " << ToFunc->getPrintName() << " @ "
|
|
|
|
<< ToFunc->getPrintName() << " @ "
|
|
|
|
<< Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
|
2017-09-02 09:13:51 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
Optional<BoltAddressTranslation::FallthroughListTy> FTs =
|
|
|
|
BAT ? BAT->getFallthroughsInTrace(*FromFunc, First.To, Second.From)
|
|
|
|
: getFallthroughsInTrace(*FromFunc, First, Second, Count);
|
2017-09-02 09:13:51 +08:00
|
|
|
if (!FTs) {
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(
|
|
|
|
dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
|
|
|
|
<< " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
|
|
|
|
<< " and ending in " << ToFunc->getPrintName() << " @ "
|
|
|
|
<< ToFunc->getPrintName() << " @ "
|
|
|
|
<< Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
|
2018-07-18 09:31:46 +08:00
|
|
|
NumInvalidTraces += Count;
|
2017-09-02 09:13:51 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for "
|
|
|
|
<< FromFunc->getPrintName() << ":"
|
|
|
|
<< Twine::utohexstr(First.To) << " to "
|
|
|
|
<< Twine::utohexstr(Second.From) << ".\n");
|
2021-12-29 10:29:54 +08:00
|
|
|
for (const std::pair<uint64_t, uint64_t> &Pair : *FTs)
|
2018-07-18 09:31:46 +08:00
|
|
|
doIntraBranch(*FromFunc, Pair.first + FromFunc->getAddress(),
|
|
|
|
Pair.second + FromFunc->getAddress(), Count, false);
|
2017-09-02 09:13:51 +08:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-05-08 14:00:29 +08:00
|
|
|
bool DataAggregator::recordTrace(
|
|
|
|
BinaryFunction &BF,
|
|
|
|
const LBREntry &FirstLBR,
|
|
|
|
const LBREntry &SecondLBR,
|
|
|
|
uint64_t Count,
|
|
|
|
SmallVector<std::pair<uint64_t, uint64_t>, 16> *Branches) const {
|
2021-04-08 15:19:26 +08:00
|
|
|
BinaryContext &BC = BF.getBinaryContext();
|
2020-05-08 14:00:29 +08:00
|
|
|
|
|
|
|
if (!BF.isSimple())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
assert(BF.hasCFG() && "can only record traces in CFG state");
|
|
|
|
|
|
|
|
// Offsets of the trace within this function.
|
2021-04-08 15:19:26 +08:00
|
|
|
const uint64_t From = FirstLBR.To - BF.getAddress();
|
|
|
|
const uint64_t To = SecondLBR.From - BF.getAddress();
|
2020-05-08 14:00:29 +08:00
|
|
|
|
|
|
|
if (From > To)
|
|
|
|
return false;
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From);
|
|
|
|
BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To);
|
2020-05-08 14:00:29 +08:00
|
|
|
|
|
|
|
if (!FromBB || !ToBB)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Adjust FromBB if the first LBR is a return from the last instruction in
|
|
|
|
// the previous block (that instruction should be a call).
|
|
|
|
if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) &&
|
|
|
|
!FromBB->isEntryPoint() && !FromBB->isLandingPad()) {
|
2021-04-08 15:19:26 +08:00
|
|
|
BinaryBasicBlock *PrevBB = BF.BasicBlocksLayout[FromBB->getIndex() - 1];
|
2020-05-08 14:00:29 +08:00
|
|
|
if (PrevBB->getSuccessor(FromBB->getLabel())) {
|
2021-04-08 15:19:26 +08:00
|
|
|
const MCInst *Instr = PrevBB->getLastNonPseudoInstr();
|
2021-12-29 10:29:54 +08:00
|
|
|
if (Instr && BC.MIB->isCall(*Instr))
|
2020-05-08 14:00:29 +08:00
|
|
|
FromBB = PrevBB;
|
2021-12-29 10:29:54 +08:00
|
|
|
else
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR
|
|
|
|
<< '\n');
|
2020-05-08 14:00:29 +08:00
|
|
|
} else {
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n');
|
2020-05-08 14:00:29 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Fill out information for fall-through edges. The From and To could be
|
|
|
|
// within the same basic block, e.g. when two call instructions are in the
|
|
|
|
// same block. In this case we skip the processing.
|
2021-12-29 10:29:54 +08:00
|
|
|
if (FromBB == ToBB)
|
2020-05-08 14:00:29 +08:00
|
|
|
return true;
|
|
|
|
|
|
|
|
// Process blocks in the original layout order.
|
2021-04-08 15:19:26 +08:00
|
|
|
BinaryBasicBlock *BB = BF.BasicBlocksLayout[FromBB->getIndex()];
|
2020-05-08 14:00:29 +08:00
|
|
|
assert(BB == FromBB && "index mismatch");
|
|
|
|
while (BB != ToBB) {
|
2021-04-08 15:19:26 +08:00
|
|
|
BinaryBasicBlock *NextBB = BF.BasicBlocksLayout[BB->getIndex() + 1];
|
2020-05-08 14:00:29 +08:00
|
|
|
assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout");
|
|
|
|
|
|
|
|
// Check for bad LBRs.
|
|
|
|
if (!BB->getSuccessor(NextBB->getLabel())) {
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n"
|
|
|
|
<< " " << FirstLBR << '\n'
|
|
|
|
<< " " << SecondLBR << '\n');
|
2020-05-08 14:00:29 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-12-15 08:52:51 +08:00
|
|
|
// Record fall-through jumps
|
2021-04-08 15:19:26 +08:00
|
|
|
BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*NextBB);
|
2020-05-08 14:00:29 +08:00
|
|
|
BI.Count += Count;
|
|
|
|
|
|
|
|
if (Branches) {
|
2021-04-08 15:19:26 +08:00
|
|
|
const MCInst *Instr = BB->getLastNonPseudoInstr();
|
2021-05-14 01:50:47 +08:00
|
|
|
uint64_t Offset = 0;
|
2021-12-29 10:29:54 +08:00
|
|
|
if (Instr)
|
2021-08-04 08:53:32 +08:00
|
|
|
Offset = BC.MIB->getOffsetWithDefault(*Instr, 0);
|
2021-12-29 10:29:54 +08:00
|
|
|
else
|
2020-05-08 14:00:29 +08:00
|
|
|
Offset = BB->getOffset();
|
2021-12-29 10:29:54 +08:00
|
|
|
|
2021-05-08 09:43:25 +08:00
|
|
|
Branches->emplace_back(Offset, NextBB->getOffset());
|
2020-05-08 14:00:29 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
BB = NextBB;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
Optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
|
|
|
|
DataAggregator::getFallthroughsInTrace(BinaryFunction &BF,
|
|
|
|
const LBREntry &FirstLBR,
|
|
|
|
const LBREntry &SecondLBR,
|
|
|
|
uint64_t Count) const {
|
|
|
|
SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
|
|
|
|
|
|
|
|
if (!recordTrace(BF, FirstLBR, SecondLBR, Count, &Res))
|
|
|
|
return NoneType();
|
|
|
|
|
|
|
|
return Res;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred,
|
|
|
|
uint64_t Count) const {
|
|
|
|
if (To > BF.getSize())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (!BF.hasProfile())
|
|
|
|
BF.ExecutionCount = 0;
|
|
|
|
|
|
|
|
BinaryBasicBlock *EntryBB = nullptr;
|
|
|
|
if (To == 0) {
|
|
|
|
BF.ExecutionCount += Count;
|
|
|
|
if (!BF.empty())
|
|
|
|
EntryBB = &BF.front();
|
2021-04-08 15:19:26 +08:00
|
|
|
} else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(To)) {
|
2020-05-08 14:00:29 +08:00
|
|
|
if (BB->isEntryPoint())
|
|
|
|
EntryBB = BB;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (EntryBB)
|
|
|
|
EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-12-15 08:52:51 +08:00
|
|
|
bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred,
|
|
|
|
uint64_t Count) const {
|
2020-05-08 14:00:29 +08:00
|
|
|
if (!BF.isSimple() || From > BF.getSize())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (!BF.hasProfile())
|
|
|
|
BF.ExecutionCount = 0;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-09-02 09:13:51 +08:00
|
|
|
ErrorOr<LBREntry> DataAggregator::parseLBREntry() {
|
|
|
|
LBREntry Res;
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<StringRef> FromStrRes = parseString('/');
|
2017-09-02 09:13:51 +08:00
|
|
|
if (std::error_code EC = FromStrRes.getError())
|
|
|
|
return EC;
|
|
|
|
StringRef OffsetStr = FromStrRes.get();
|
|
|
|
if (OffsetStr.getAsInteger(0, Res.From)) {
|
|
|
|
reportError("expected hexadecimal number with From address");
|
|
|
|
Diag << "Found: " << OffsetStr << "\n";
|
|
|
|
return make_error_code(llvm::errc::io_error);
|
|
|
|
}
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<StringRef> ToStrRes = parseString('/');
|
2017-09-02 09:13:51 +08:00
|
|
|
if (std::error_code EC = ToStrRes.getError())
|
|
|
|
return EC;
|
|
|
|
OffsetStr = ToStrRes.get();
|
|
|
|
if (OffsetStr.getAsInteger(0, Res.To)) {
|
|
|
|
reportError("expected hexadecimal number with To address");
|
|
|
|
Diag << "Found: " << OffsetStr << "\n";
|
|
|
|
return make_error_code(llvm::errc::io_error);
|
|
|
|
}
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<StringRef> MispredStrRes = parseString('/');
|
2017-09-02 09:13:51 +08:00
|
|
|
if (std::error_code EC = MispredStrRes.getError())
|
|
|
|
return EC;
|
|
|
|
StringRef MispredStr = MispredStrRes.get();
|
|
|
|
if (MispredStr.size() != 1 ||
|
2018-07-11 01:25:55 +08:00
|
|
|
(MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) {
|
2017-09-02 09:13:51 +08:00
|
|
|
reportError("expected single char for mispred bit");
|
2018-07-06 11:47:38 +08:00
|
|
|
Diag << "Found: " << MispredStr << "\n";
|
2017-09-02 09:13:51 +08:00
|
|
|
return make_error_code(llvm::errc::io_error);
|
|
|
|
}
|
|
|
|
Res.Mispred = MispredStr[0] == 'M';
|
|
|
|
|
2021-12-15 08:52:51 +08:00
|
|
|
static bool MispredWarning = true;
|
2018-07-11 01:25:55 +08:00
|
|
|
if (MispredStr[0] == '-' && MispredWarning) {
|
|
|
|
errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n";
|
|
|
|
MispredWarning = false;
|
|
|
|
}
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<StringRef> Rest = parseString(FieldSeparator, true);
|
2017-09-02 09:13:51 +08:00
|
|
|
if (std::error_code EC = Rest.getError())
|
|
|
|
return EC;
|
|
|
|
if (Rest.get().size() < 5) {
|
|
|
|
reportError("expected rest of LBR entry");
|
2018-07-06 11:47:38 +08:00
|
|
|
Diag << "Found: " << Rest.get() << "\n";
|
2017-09-02 09:13:51 +08:00
|
|
|
return make_error_code(llvm::errc::io_error);
|
|
|
|
}
|
|
|
|
return Res;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool DataAggregator::checkAndConsumeFS() {
|
2021-12-29 10:29:54 +08:00
|
|
|
if (ParsingBuf[0] != FieldSeparator)
|
2017-09-02 09:13:51 +08:00
|
|
|
return false;
|
2021-12-29 10:29:54 +08:00
|
|
|
|
2017-09-02 09:13:51 +08:00
|
|
|
ParsingBuf = ParsingBuf.drop_front(1);
|
|
|
|
Col += 1;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void DataAggregator::consumeRestOfLine() {
|
2021-04-08 15:19:26 +08:00
|
|
|
size_t LineEnd = ParsingBuf.find_first_of('\n');
|
2017-09-02 09:13:51 +08:00
|
|
|
if (LineEnd == StringRef::npos) {
|
|
|
|
ParsingBuf = StringRef();
|
|
|
|
Col = 0;
|
|
|
|
Line += 1;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
ParsingBuf = ParsingBuf.drop_front(LineEnd + 1);
|
|
|
|
Col = 0;
|
|
|
|
Line += 1;
|
|
|
|
}
|
|
|
|
|
2019-01-16 15:43:40 +08:00
|
|
|
ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() {
|
2017-10-17 04:09:43 +08:00
|
|
|
PerfBranchSample Res;
|
2017-09-02 09:13:51 +08:00
|
|
|
|
2021-12-15 08:52:51 +08:00
|
|
|
while (checkAndConsumeFS()) {
|
|
|
|
}
|
2017-09-02 09:13:51 +08:00
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
|
2017-09-02 09:13:51 +08:00
|
|
|
if (std::error_code EC = PIDRes.getError())
|
|
|
|
return EC;
|
2018-08-15 04:24:44 +08:00
|
|
|
auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
|
Generate heatmap for linux kernel
Summary:
This diff handles several challenges related to heatmap generation for Linux kernel (vmlinux elf file):
- If the input binary elf file contains the section `__ksymtab`, this diff assumes that this is the linux kernel `vmlinux` file and enables an extra flag `LinuxKernelMode`
- In `LinuxKernelMode`, we only support heat map generation right now, therefore it ensures that current BOLT mode is heat map generation. Otherwise, it exits with error.
- For some Linux symbol and section combinations, BOLT may not be able to find section for symbol (specially symbols that specifies the end of some section). For such cases, we show an warning message without exiting which was the previous behavior.
- Linux kernel elf file does not contain dynamic section, therefore, we don't exit when no dynamic section is found for linux kernel binary.
- Current `ParseMMap` logic does not work with linux kernel. MMap entries for linux kernel uses `PERF_RECORD_MMAP` format instead of typical `PERF_RECORD_MMAP2` format. Since linux kernel address mapping is absolute (same as specified in the ELF file), we avoid calling `ParseMMap` in linux kernel mode.
- Linux kernel entries are registered with PID -1, therefore `BinaryMMapInfo` lookup is not required for linux kernel entries. Similarly, `adjustLBR` is also not required.
- Default max address in linux kernel mode is highest unsigned 64-bit integer instead of current 4GBs.
- Added another new parameter for heatmap, `MinAddress`, in case of Linux kernel mode which is `KernelBaseAddress`, otherwise, it is 0. While registering Heatmap sample counts from LBR entries, any address lower than this `MinAddress` is ignored.
- `IgnoreInterruptLBR` is disabled in linux kernel mode to ensure that kernel entries are processed
Currently, linux kernel heat map also include heat map for Linux kernel modules that are not part of vmlinux elf file. This is intentional to identify other potential optimization opportunities. If reviewers think, those modules should be omitted, I will disable those modules based on highest end address of a vmlinux elf section.
(cherry picked from FBD21992765)
2020-06-11 14:00:39 +08:00
|
|
|
if (!opts::LinuxKernelMode && MMapInfoIter == BinaryMMapInfo.end()) {
|
2017-09-02 09:13:51 +08:00
|
|
|
consumeRestOfLine();
|
2019-04-16 07:42:49 +08:00
|
|
|
return make_error_code(errc::no_such_process);
|
2017-09-02 09:13:51 +08:00
|
|
|
}
|
|
|
|
|
2021-12-15 08:52:51 +08:00
|
|
|
while (checkAndConsumeFS()) {
|
|
|
|
}
|
2019-01-23 09:21:45 +08:00
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
|
2019-01-23 09:21:45 +08:00
|
|
|
if (std::error_code EC = PCRes.getError())
|
|
|
|
return EC;
|
|
|
|
Res.PC = PCRes.get();
|
|
|
|
|
|
|
|
if (checkAndConsumeNewLine())
|
|
|
|
return Res;
|
|
|
|
|
2017-09-02 09:13:51 +08:00
|
|
|
while (!checkAndConsumeNewLine()) {
|
2017-10-10 06:52:13 +08:00
|
|
|
checkAndConsumeFS();
|
2017-09-02 09:13:51 +08:00
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<LBREntry> LBRRes = parseLBREntry();
|
2017-09-02 09:13:51 +08:00
|
|
|
if (std::error_code EC = LBRRes.getError())
|
|
|
|
return EC;
|
2021-04-08 15:19:26 +08:00
|
|
|
LBREntry LBR = LBRRes.get();
|
2019-09-04 01:01:26 +08:00
|
|
|
if (ignoreKernelInterrupt(LBR))
|
|
|
|
continue;
|
2018-08-15 04:24:44 +08:00
|
|
|
if (!BC->HasFixedLoadAddress)
|
|
|
|
adjustLBR(LBR, MMapInfoIter->second);
|
|
|
|
Res.LBR.push_back(LBR);
|
2017-09-02 09:13:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return Res;
|
|
|
|
}
|
|
|
|
|
2019-01-16 15:43:40 +08:00
|
|
|
ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() {
|
2021-12-15 08:52:51 +08:00
|
|
|
while (checkAndConsumeFS()) {
|
|
|
|
}
|
2018-04-14 02:18:46 +08:00
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
|
2018-04-14 02:18:46 +08:00
|
|
|
if (std::error_code EC = PIDRes.getError())
|
|
|
|
return EC;
|
2018-08-15 04:24:44 +08:00
|
|
|
|
|
|
|
auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
|
|
|
|
if (MMapInfoIter == BinaryMMapInfo.end()) {
|
2018-04-14 02:18:46 +08:00
|
|
|
consumeRestOfLine();
|
|
|
|
return PerfBasicSample{StringRef(), 0};
|
|
|
|
}
|
|
|
|
|
2021-12-15 08:52:51 +08:00
|
|
|
while (checkAndConsumeFS()) {
|
|
|
|
}
|
2018-04-14 02:18:46 +08:00
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<StringRef> Event = parseString(FieldSeparator);
|
2018-04-14 02:18:46 +08:00
|
|
|
if (std::error_code EC = Event.getError())
|
|
|
|
return EC;
|
|
|
|
|
2021-12-15 08:52:51 +08:00
|
|
|
while (checkAndConsumeFS()) {
|
|
|
|
}
|
2018-04-14 02:18:46 +08:00
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator, true);
|
2021-12-29 10:29:54 +08:00
|
|
|
if (std::error_code EC = AddrRes.getError())
|
2018-04-14 02:18:46 +08:00
|
|
|
return EC;
|
|
|
|
|
|
|
|
if (!checkAndConsumeNewLine()) {
|
|
|
|
reportError("expected end of line");
|
|
|
|
return make_error_code(llvm::errc::io_error);
|
|
|
|
}
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
uint64_t Address = *AddrRes;
|
2018-08-15 04:24:44 +08:00
|
|
|
if (!BC->HasFixedLoadAddress)
|
|
|
|
adjustAddress(Address, MMapInfoIter->second);
|
|
|
|
|
|
|
|
return PerfBasicSample{Event.get(), Address};
|
2018-04-14 02:18:46 +08:00
|
|
|
}
|
|
|
|
|
2019-01-16 15:43:40 +08:00
|
|
|
ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() {
|
2021-12-15 08:52:51 +08:00
|
|
|
PerfMemSample Res{0, 0};
|
2017-10-17 04:09:43 +08:00
|
|
|
|
2021-12-15 08:52:51 +08:00
|
|
|
while (checkAndConsumeFS()) {
|
|
|
|
}
|
2017-10-17 04:09:43 +08:00
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
|
2017-10-17 04:09:43 +08:00
|
|
|
if (std::error_code EC = PIDRes.getError())
|
|
|
|
return EC;
|
2018-08-15 04:24:44 +08:00
|
|
|
|
|
|
|
auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
|
|
|
|
if (MMapInfoIter == BinaryMMapInfo.end()) {
|
2017-10-17 04:09:43 +08:00
|
|
|
consumeRestOfLine();
|
|
|
|
return Res;
|
|
|
|
}
|
|
|
|
|
2021-12-15 08:52:51 +08:00
|
|
|
while (checkAndConsumeFS()) {
|
|
|
|
}
|
2017-10-17 04:09:43 +08:00
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<StringRef> Event = parseString(FieldSeparator);
|
2017-10-17 04:09:43 +08:00
|
|
|
if (std::error_code EC = Event.getError())
|
|
|
|
return EC;
|
|
|
|
if (Event.get().find("mem-loads") == StringRef::npos) {
|
|
|
|
consumeRestOfLine();
|
|
|
|
return Res;
|
|
|
|
}
|
|
|
|
|
2021-12-15 08:52:51 +08:00
|
|
|
while (checkAndConsumeFS()) {
|
|
|
|
}
|
2017-10-17 04:09:43 +08:00
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator);
|
2021-12-29 10:29:54 +08:00
|
|
|
if (std::error_code EC = AddrRes.getError())
|
2017-10-17 04:09:43 +08:00
|
|
|
return EC;
|
|
|
|
|
2021-12-15 08:52:51 +08:00
|
|
|
while (checkAndConsumeFS()) {
|
|
|
|
}
|
2017-10-17 04:09:43 +08:00
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
|
2017-10-17 04:09:43 +08:00
|
|
|
if (std::error_code EC = PCRes.getError()) {
|
|
|
|
consumeRestOfLine();
|
|
|
|
return EC;
|
|
|
|
}
|
|
|
|
|
2018-04-14 02:18:46 +08:00
|
|
|
if (!checkAndConsumeNewLine()) {
|
|
|
|
reportError("expected end of line");
|
|
|
|
return make_error_code(llvm::errc::io_error);
|
|
|
|
}
|
2017-10-17 04:09:43 +08:00
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
uint64_t Address = *AddrRes;
|
2018-08-15 04:24:44 +08:00
|
|
|
if (!BC->HasFixedLoadAddress)
|
|
|
|
adjustAddress(Address, MMapInfoIter->second);
|
|
|
|
|
|
|
|
return PerfMemSample{PCRes.get(), Address};
|
2017-10-17 04:09:43 +08:00
|
|
|
}
|
|
|
|
|
2018-07-18 09:31:46 +08:00
|
|
|
ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
|
|
|
|
auto parseOffset = [this]() -> ErrorOr<Location> {
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<uint64_t> Res = parseHexField(FieldSeparator);
|
2018-07-18 09:31:46 +08:00
|
|
|
if (std::error_code EC = Res.getError())
|
|
|
|
return EC;
|
|
|
|
return Location(Res.get());
|
|
|
|
};
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
size_t Sep = ParsingBuf.find_first_of(" \n");
|
2018-07-18 09:31:46 +08:00
|
|
|
if (Sep == StringRef::npos)
|
|
|
|
return parseOffset();
|
2021-04-08 15:19:26 +08:00
|
|
|
StringRef LookAhead = ParsingBuf.substr(0, Sep);
|
2018-07-18 09:31:46 +08:00
|
|
|
if (LookAhead.find_first_of(":") == StringRef::npos)
|
|
|
|
return parseOffset();
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<StringRef> BuildID = parseString(':');
|
2018-07-18 09:31:46 +08:00
|
|
|
if (std::error_code EC = BuildID.getError())
|
|
|
|
return EC;
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<uint64_t> Offset = parseHexField(FieldSeparator);
|
2018-07-18 09:31:46 +08:00
|
|
|
if (std::error_code EC = Offset.getError())
|
|
|
|
return EC;
|
|
|
|
return Location(true, BuildID.get(), Offset.get());
|
|
|
|
}
|
|
|
|
|
2019-01-16 15:43:40 +08:00
|
|
|
ErrorOr<DataAggregator::AggregatedLBREntry>
|
|
|
|
DataAggregator::parseAggregatedLBREntry() {
|
2021-12-15 08:52:51 +08:00
|
|
|
while (checkAndConsumeFS()) {
|
|
|
|
}
|
2018-07-18 09:31:46 +08:00
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
|
2018-07-18 09:31:46 +08:00
|
|
|
if (std::error_code EC = TypeOrErr.getError())
|
|
|
|
return EC;
|
2018-09-18 03:17:33 +08:00
|
|
|
auto Type = AggregatedLBREntry::BRANCH;
|
2018-07-18 09:31:46 +08:00
|
|
|
if (TypeOrErr.get() == "B") {
|
|
|
|
Type = AggregatedLBREntry::BRANCH;
|
|
|
|
} else if (TypeOrErr.get() == "F") {
|
|
|
|
Type = AggregatedLBREntry::FT;
|
|
|
|
} else if (TypeOrErr.get() == "f") {
|
|
|
|
Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
|
|
|
|
} else {
|
|
|
|
reportError("expected B, F or f");
|
|
|
|
return make_error_code(llvm::errc::io_error);
|
|
|
|
}
|
|
|
|
|
2021-12-15 08:52:51 +08:00
|
|
|
while (checkAndConsumeFS()) {
|
|
|
|
}
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<Location> From = parseLocationOrOffset();
|
2018-07-18 09:31:46 +08:00
|
|
|
if (std::error_code EC = From.getError())
|
|
|
|
return EC;
|
|
|
|
|
2021-12-15 08:52:51 +08:00
|
|
|
while (checkAndConsumeFS()) {
|
|
|
|
}
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<Location> To = parseLocationOrOffset();
|
2018-07-18 09:31:46 +08:00
|
|
|
if (std::error_code EC = To.getError())
|
|
|
|
return EC;
|
|
|
|
|
2021-12-15 08:52:51 +08:00
|
|
|
while (checkAndConsumeFS()) {
|
|
|
|
}
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<int64_t> Frequency =
|
|
|
|
parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH);
|
2018-07-18 09:31:46 +08:00
|
|
|
if (std::error_code EC = Frequency.getError())
|
|
|
|
return EC;
|
|
|
|
|
2021-05-14 01:50:47 +08:00
|
|
|
uint64_t Mispreds = 0;
|
2018-07-18 09:31:46 +08:00
|
|
|
if (Type == AggregatedLBREntry::BRANCH) {
|
2021-12-15 08:52:51 +08:00
|
|
|
while (checkAndConsumeFS()) {
|
|
|
|
}
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true);
|
2018-07-18 09:31:46 +08:00
|
|
|
if (std::error_code EC = MispredsOrErr.getError())
|
|
|
|
return EC;
|
|
|
|
Mispreds = static_cast<uint64_t>(MispredsOrErr.get());
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!checkAndConsumeNewLine()) {
|
|
|
|
reportError("expected end of line");
|
|
|
|
return make_error_code(llvm::errc::io_error);
|
|
|
|
}
|
|
|
|
|
|
|
|
return AggregatedLBREntry{From.get(), To.get(),
|
|
|
|
static_cast<uint64_t>(Frequency.get()), Mispreds,
|
|
|
|
Type};
|
|
|
|
}
|
|
|
|
|
2017-09-02 09:13:51 +08:00
|
|
|
bool DataAggregator::hasData() {
|
|
|
|
if (ParsingBuf.size() == 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-09-04 01:01:26 +08:00
|
|
|
bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const {
|
|
|
|
return opts::IgnoreInterruptLBR &&
|
|
|
|
(LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr);
|
|
|
|
}
|
|
|
|
|
2019-02-06 07:28:19 +08:00
|
|
|
std::error_code DataAggregator::printLBRHeatMap() {
|
|
|
|
outs() << "PERF2BOLT: parse branch events...\n";
|
|
|
|
NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
|
|
|
|
TimerGroupDesc, opts::TimeAggregator);
|
|
|
|
|
Generate heatmap for linux kernel
Summary:
This diff handles several challenges related to heatmap generation for Linux kernel (vmlinux elf file):
- If the input binary elf file contains the section `__ksymtab`, this diff assumes that this is the linux kernel `vmlinux` file and enables an extra flag `LinuxKernelMode`
- In `LinuxKernelMode`, we only support heat map generation right now, therefore it ensures that current BOLT mode is heat map generation. Otherwise, it exits with error.
- For some Linux symbol and section combinations, BOLT may not be able to find section for symbol (specially symbols that specifies the end of some section). For such cases, we show an warning message without exiting which was the previous behavior.
- Linux kernel elf file does not contain dynamic section, therefore, we don't exit when no dynamic section is found for linux kernel binary.
- Current `ParseMMap` logic does not work with linux kernel. MMap entries for linux kernel uses `PERF_RECORD_MMAP` format instead of typical `PERF_RECORD_MMAP2` format. Since linux kernel address mapping is absolute (same as specified in the ELF file), we avoid calling `ParseMMap` in linux kernel mode.
- Linux kernel entries are registered with PID -1, therefore `BinaryMMapInfo` lookup is not required for linux kernel entries. Similarly, `adjustLBR` is also not required.
- Default max address in linux kernel mode is highest unsigned 64-bit integer instead of current 4GBs.
- Added another new parameter for heatmap, `MinAddress`, in case of Linux kernel mode which is `KernelBaseAddress`, otherwise, it is 0. While registering Heatmap sample counts from LBR entries, any address lower than this `MinAddress` is ignored.
- `IgnoreInterruptLBR` is disabled in linux kernel mode to ensure that kernel entries are processed
Currently, linux kernel heat map also include heat map for Linux kernel modules that are not part of vmlinux elf file. This is intentional to identify other potential optimization opportunities. If reviewers think, those modules should be omitted, I will disable those modules based on highest end address of a vmlinux elf section.
(cherry picked from FBD21992765)
2020-06-11 14:00:39 +08:00
|
|
|
if (opts::LinuxKernelMode) {
|
|
|
|
opts::HeatmapMaxAddress = 0xffffffffffffffff;
|
|
|
|
opts::HeatmapMinAddress = KernelBaseAddr;
|
|
|
|
}
|
|
|
|
Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress,
|
|
|
|
opts::HeatmapMaxAddress);
|
2021-05-14 01:50:47 +08:00
|
|
|
uint64_t NumTotalSamples = 0;
|
2019-02-06 07:28:19 +08:00
|
|
|
|
|
|
|
while (hasData()) {
|
2022-04-12 05:39:41 +08:00
|
|
|
if (opts::BasicAggregation) {
|
|
|
|
ErrorOr<PerfBasicSample> SampleRes = parseBasicSample();
|
|
|
|
if (std::error_code EC = SampleRes.getError()) {
|
|
|
|
if (EC == errc::no_such_process)
|
|
|
|
continue;
|
|
|
|
return EC;
|
|
|
|
}
|
|
|
|
PerfBasicSample &Sample = SampleRes.get();
|
|
|
|
HM.registerAddress(Sample.PC);
|
|
|
|
NumTotalSamples++;
|
|
|
|
} else {
|
|
|
|
ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
|
|
|
|
if (std::error_code EC = SampleRes.getError()) {
|
|
|
|
if (EC == errc::no_such_process)
|
|
|
|
continue;
|
|
|
|
return EC;
|
|
|
|
}
|
2019-02-06 07:28:19 +08:00
|
|
|
|
2022-04-12 05:39:41 +08:00
|
|
|
PerfBranchSample &Sample = SampleRes.get();
|
|
|
|
|
|
|
|
// LBRs are stored in reverse execution order. NextLBR refers to the next
|
|
|
|
// executed branch record.
|
|
|
|
const LBREntry *NextLBR = nullptr;
|
|
|
|
for (const LBREntry &LBR : Sample.LBR) {
|
|
|
|
if (NextLBR) {
|
|
|
|
// Record fall-through trace.
|
|
|
|
const uint64_t TraceFrom = LBR.To;
|
|
|
|
const uint64_t TraceTo = NextLBR->From;
|
|
|
|
++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount;
|
|
|
|
}
|
|
|
|
NextLBR = &LBR;
|
2019-02-06 07:28:19 +08:00
|
|
|
}
|
2022-04-12 05:39:41 +08:00
|
|
|
if (!Sample.LBR.empty()) {
|
|
|
|
HM.registerAddress(Sample.LBR.front().To);
|
|
|
|
HM.registerAddress(Sample.LBR.back().From);
|
|
|
|
}
|
|
|
|
NumTotalSamples += Sample.LBR.size();
|
2019-02-06 07:28:19 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!NumTotalSamples) {
|
2022-04-12 05:39:41 +08:00
|
|
|
if (!opts::BasicAggregation) {
|
|
|
|
errs() << "HEATMAP-ERROR: no LBR traces detected in profile. "
|
|
|
|
"Cannot build heatmap. Use -nl for building heatmap from "
|
|
|
|
"basic events.\n";
|
|
|
|
} else {
|
|
|
|
errs() << "HEATMAP-ERROR: no samples detected in profile. "
|
|
|
|
"Cannot build heatmap.";
|
|
|
|
}
|
2019-02-06 07:28:19 +08:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n";
|
|
|
|
outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n";
|
|
|
|
|
|
|
|
outs() << "HEATMAP: building heat map...\n";
|
|
|
|
|
|
|
|
for (const auto &LBR : FallthroughLBRs) {
|
2021-04-08 15:19:26 +08:00
|
|
|
const Trace &Trace = LBR.first;
|
|
|
|
const FTInfo &Info = LBR.second;
|
2019-02-06 07:28:19 +08:00
|
|
|
HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (HM.getNumInvalidRanges())
|
|
|
|
outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n';
|
|
|
|
|
|
|
|
if (!HM.size()) {
|
|
|
|
errs() << "HEATMAP-ERROR: no valid traces registered\n";
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2022-02-08 02:30:38 +08:00
|
|
|
HM.print(opts::OutputFilename);
|
|
|
|
if (opts::OutputFilename == "-")
|
|
|
|
HM.printCDF(opts::OutputFilename);
|
2021-12-29 10:29:54 +08:00
|
|
|
else
|
2022-02-08 02:30:38 +08:00
|
|
|
HM.printCDF(opts::OutputFilename + ".csv");
|
2019-02-06 07:28:19 +08:00
|
|
|
|
|
|
|
return std::error_code();
|
|
|
|
}
|
|
|
|
|
2017-10-17 04:09:43 +08:00
|
|
|
std::error_code DataAggregator::parseBranchEvents() {
|
2019-01-16 15:43:40 +08:00
|
|
|
outs() << "PERF2BOLT: parse branch events...\n";
|
|
|
|
NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-07 07:00:23 +08:00
|
|
|
TimerGroupDesc, opts::TimeAggregator);
|
2019-01-16 15:43:40 +08:00
|
|
|
|
2021-05-14 01:50:47 +08:00
|
|
|
uint64_t NumTotalSamples = 0;
|
|
|
|
uint64_t NumEntries = 0;
|
|
|
|
uint64_t NumSamples = 0;
|
|
|
|
uint64_t NumSamplesNoLBR = 0;
|
|
|
|
uint64_t NumTraces = 0;
|
|
|
|
bool NeedsSkylakeFix = false;
|
2018-10-03 08:16:26 +08:00
|
|
|
|
2020-05-08 14:00:29 +08:00
|
|
|
while (hasData() && NumTotalSamples < opts::MaxSamples) {
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
++NumTotalSamples;
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
|
|
|
|
if (std::error_code EC = SampleRes.getError()) {
|
2019-04-16 07:42:49 +08:00
|
|
|
if (EC == errc::no_such_process)
|
2019-04-06 08:27:25 +08:00
|
|
|
continue;
|
2017-09-02 09:13:51 +08:00
|
|
|
return EC;
|
2019-04-06 08:27:25 +08:00
|
|
|
}
|
|
|
|
++NumSamples;
|
2017-09-02 09:13:51 +08:00
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
PerfBranchSample &Sample = SampleRes.get();
|
2019-01-23 09:21:45 +08:00
|
|
|
if (opts::WriteAutoFDOData)
|
|
|
|
++BasicSamples[Sample.PC];
|
|
|
|
|
2019-04-06 08:27:25 +08:00
|
|
|
if (Sample.LBR.empty()) {
|
|
|
|
++NumSamplesNoLBR;
|
2017-09-02 09:13:51 +08:00
|
|
|
continue;
|
2019-04-06 08:27:25 +08:00
|
|
|
}
|
2017-09-02 09:13:51 +08:00
|
|
|
|
|
|
|
NumEntries += Sample.LBR.size();
|
2020-08-05 01:59:37 +08:00
|
|
|
if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) {
|
|
|
|
errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n";
|
2019-10-18 07:35:57 +08:00
|
|
|
NeedsSkylakeFix = true;
|
|
|
|
}
|
2017-09-02 09:13:51 +08:00
|
|
|
|
2019-06-07 10:38:06 +08:00
|
|
|
// LBRs are stored in reverse execution order. NextPC refers to the next
|
|
|
|
// recorded executed PC.
|
|
|
|
uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0;
|
2021-05-14 01:50:47 +08:00
|
|
|
uint32_t NumEntry = 0;
|
2021-04-08 15:19:26 +08:00
|
|
|
for (const LBREntry &LBR : Sample.LBR) {
|
2019-10-18 07:35:57 +08:00
|
|
|
++NumEntry;
|
|
|
|
// Hardware bug workaround: Intel Skylake (which has 32 LBR entries)
|
|
|
|
// sometimes record entry 32 as an exact copy of entry 31. This will cause
|
|
|
|
// us to likely record an invalid trace and generate a stale function for
|
|
|
|
// BAT mode (non BAT disassembles the function and is able to ignore this
|
|
|
|
// trace at aggregation time). Drop first 2 entries (last two, in
|
|
|
|
// chronological order)
|
|
|
|
if (NeedsSkylakeFix && NumEntry <= 2)
|
|
|
|
continue;
|
2019-06-07 10:38:06 +08:00
|
|
|
if (NextPC) {
|
2018-10-03 08:16:26 +08:00
|
|
|
// Record fall-through trace.
|
2021-04-08 15:19:26 +08:00
|
|
|
const uint64_t TraceFrom = LBR.To;
|
|
|
|
const uint64_t TraceTo = NextPC;
|
|
|
|
const BinaryFunction *TraceBF =
|
|
|
|
getBinaryFunctionContainingAddress(TraceFrom);
|
2018-10-03 08:16:26 +08:00
|
|
|
if (TraceBF && TraceBF->containsAddress(TraceTo)) {
|
2021-04-08 15:19:26 +08:00
|
|
|
FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
|
2021-12-29 10:29:54 +08:00
|
|
|
if (TraceBF->containsAddress(LBR.From))
|
2021-04-08 15:19:26 +08:00
|
|
|
++Info.InternCount;
|
2021-12-29 10:29:54 +08:00
|
|
|
else
|
2021-04-08 15:19:26 +08:00
|
|
|
++Info.ExternCount;
|
2018-10-03 08:16:26 +08:00
|
|
|
} else {
|
|
|
|
if (TraceBF && getBinaryFunctionContainingAddress(TraceTo)) {
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(dbgs()
|
|
|
|
<< "Invalid trace starting in "
|
|
|
|
<< TraceBF->getPrintName() << " @ "
|
|
|
|
<< Twine::utohexstr(TraceFrom - TraceBF->getAddress())
|
|
|
|
<< " and ending @ " << Twine::utohexstr(TraceTo)
|
|
|
|
<< '\n');
|
2018-10-03 08:16:26 +08:00
|
|
|
++NumInvalidTraces;
|
|
|
|
} else {
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(dbgs()
|
|
|
|
<< "Out of range trace starting in "
|
2019-04-13 08:33:46 +08:00
|
|
|
<< (TraceBF ? TraceBF->getPrintName() : "None") << " @ "
|
|
|
|
<< Twine::utohexstr(
|
|
|
|
TraceFrom - (TraceBF ? TraceBF->getAddress() : 0))
|
|
|
|
<< " and ending in "
|
|
|
|
<< (getBinaryFunctionContainingAddress(TraceTo)
|
|
|
|
? getBinaryFunctionContainingAddress(TraceTo)
|
|
|
|
->getPrintName()
|
|
|
|
: "None")
|
|
|
|
<< " @ "
|
|
|
|
<< Twine::utohexstr(
|
|
|
|
TraceTo -
|
|
|
|
(getBinaryFunctionContainingAddress(TraceTo)
|
|
|
|
? getBinaryFunctionContainingAddress(TraceTo)
|
|
|
|
->getAddress()
|
|
|
|
: 0))
|
|
|
|
<< '\n');
|
2018-10-03 08:16:26 +08:00
|
|
|
++NumLongRangeTraces;
|
|
|
|
}
|
|
|
|
}
|
2017-09-27 05:42:43 +08:00
|
|
|
++NumTraces;
|
|
|
|
}
|
2019-06-07 10:38:06 +08:00
|
|
|
NextPC = LBR.From;
|
2018-10-03 08:16:26 +08:00
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
uint64_t From = LBR.From;
|
2018-10-03 08:16:26 +08:00
|
|
|
if (!getBinaryFunctionContainingAddress(From))
|
|
|
|
From = 0;
|
2021-04-08 15:19:26 +08:00
|
|
|
uint64_t To = LBR.To;
|
2018-10-03 08:16:26 +08:00
|
|
|
if (!getBinaryFunctionContainingAddress(To))
|
|
|
|
To = 0;
|
|
|
|
if (!From && !To)
|
|
|
|
continue;
|
2021-04-08 15:19:26 +08:00
|
|
|
BranchInfo &Info = BranchLBRs[Trace(From, To)];
|
2018-10-03 08:16:26 +08:00
|
|
|
++Info.TakenCount;
|
|
|
|
Info.MispredCount += LBR.Mispred;
|
2017-09-02 09:13:51 +08:00
|
|
|
}
|
|
|
|
}
|
2018-10-03 08:16:26 +08:00
|
|
|
|
2019-01-16 15:43:40 +08:00
|
|
|
for (const auto &LBR : BranchLBRs) {
|
2021-04-08 15:19:26 +08:00
|
|
|
const Trace &Trace = LBR.first;
|
|
|
|
if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.From))
|
2019-01-16 15:43:40 +08:00
|
|
|
BF->setHasProfileAvailable();
|
2021-04-08 15:19:26 +08:00
|
|
|
if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.To))
|
2019-01-16 15:43:40 +08:00
|
|
|
BF->setHasProfileAvailable();
|
2018-10-03 08:16:26 +08:00
|
|
|
}
|
|
|
|
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) {
|
|
|
|
OS << " (";
|
|
|
|
if (OS.has_colors()) {
|
2021-12-29 10:29:54 +08:00
|
|
|
if (Percent > T2)
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
OS.changeColor(raw_ostream::RED);
|
2021-12-29 10:29:54 +08:00
|
|
|
else if (Percent > T1)
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
OS.changeColor(raw_ostream::YELLOW);
|
2021-12-29 10:29:54 +08:00
|
|
|
else
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
OS.changeColor(raw_ostream::GREEN);
|
|
|
|
}
|
|
|
|
OS << format("%.1f%%", Percent);
|
|
|
|
if (OS.has_colors())
|
|
|
|
OS.resetColor();
|
|
|
|
OS << ")";
|
|
|
|
};
|
|
|
|
|
2021-12-15 08:52:51 +08:00
|
|
|
outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
|
|
|
|
<< " LBR entries\n";
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
if (NumTotalSamples) {
|
2019-04-06 08:27:25 +08:00
|
|
|
if (NumSamples && NumSamplesNoLBR == NumSamples) {
|
[BOLT] Improve ICP activation policy and hot jt processing
Summary:
Previously, ICP worked with a budget of N targets to convert to
direct calls. As long as the frequency of up to N of the hottest targets
surpassed a given fraction (threshold) of the total frequency, say, 90%,
then the optimization would convert a number of targets (up to N) to
direct calls. Otherwise, it would completely abort processing this call
site. The intent was to convert a given fraction of the indirect call
site frequency to use direct calls instead, but this ends up being a
"all or nothing" strategy.
In this patch we change this to operate with the same strategy seem in
LLVM's ICP, with two thresholds. The idea is that the hottest target of
an indirect call site will be compared against these two thresholds: one
checks its frequency relative to the total frequency of the original
indirect call site, and the other checks its frequency relative to the
remaining, unconverted targets (excluding the hottest targets that were
already converted to direct calls). The remaining threshold is typically
set higher than the total threshold. This allows us more control over
ICP.
I expose two pairs of knobs, one for jump tables and another for
indirect calls.
To improve the promotion of hot jump table indices when we have memory
profile, I also fix a bug that could cause us to promote extra indices
besides the hottest ones as seen in the memory profile. When we have the
memory profile, I reapply the dual threshold checks to the memory
profile which specifies exactly which indices are hot. I then update N,
the number of targets to be promoted, based on this new information, and
update frequency information.
To allow us to work with smaller profiles, I also created an option in
perf2bolt to filter out memory samples outside the statically allocated
area of the binary (heap/stack). This option is on by default.
(cherry picked from FBD15187832)
2019-05-03 03:28:34 +08:00
|
|
|
// Note: we don't know if perf2bolt is being used to parse memory samples
|
|
|
|
// at this point. In this case, it is OK to parse zero LBRs.
|
2019-04-06 08:27:25 +08:00
|
|
|
errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
|
|
|
|
"LBR. Record profile with perf record -j any or run perf2bolt "
|
|
|
|
"in no-LBR mode with -nl (the performance improvement in -nl "
|
|
|
|
"mode may be limited)\n";
|
|
|
|
} else {
|
2021-04-08 15:19:26 +08:00
|
|
|
const uint64_t IgnoredSamples = NumTotalSamples - NumSamples;
|
|
|
|
const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples;
|
2019-04-06 08:27:25 +08:00
|
|
|
outs() << "PERF2BOLT: " << IgnoredSamples << " samples";
|
|
|
|
printColored(outs(), PercentIgnored, 20, 50);
|
|
|
|
outs() << " were ignored\n";
|
2021-12-29 10:29:54 +08:00
|
|
|
if (PercentIgnored > 50.0f)
|
2019-04-06 08:27:25 +08:00
|
|
|
errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
|
|
|
|
"were attributed to the input binary\n";
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
|
2017-09-27 05:42:43 +08:00
|
|
|
<< NumInvalidTraces;
|
2021-05-14 01:50:47 +08:00
|
|
|
float Perc = 0.0f;
|
2017-09-27 05:42:43 +08:00
|
|
|
if (NumTraces > 0) {
|
|
|
|
Perc = NumInvalidTraces * 100.0f / NumTraces;
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
printColored(outs(), Perc, 5, 10);
|
2017-09-27 05:42:43 +08:00
|
|
|
}
|
|
|
|
outs() << "\n";
|
2021-12-29 10:29:54 +08:00
|
|
|
if (Perc > 10.0f)
|
2017-09-27 05:42:43 +08:00
|
|
|
outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
|
|
|
|
"binary is probably not the same binary used during profiling "
|
|
|
|
"collection. The generated data may be ineffective for improving "
|
|
|
|
"performance.\n\n";
|
|
|
|
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
outs() << "PERF2BOLT: out of range traces involving unknown regions: "
|
2017-09-27 05:42:43 +08:00
|
|
|
<< NumLongRangeTraces;
|
2021-12-29 10:29:54 +08:00
|
|
|
if (NumTraces > 0)
|
2017-09-27 05:42:43 +08:00
|
|
|
outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
|
|
|
|
outs() << "\n";
|
2017-09-02 09:13:51 +08:00
|
|
|
|
2019-04-13 08:33:46 +08:00
|
|
|
if (NumColdSamples > 0) {
|
2021-04-08 15:19:26 +08:00
|
|
|
const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
|
2019-04-13 08:33:46 +08:00
|
|
|
outs() << "PERF2BOLT: " << NumColdSamples
|
|
|
|
<< format(" (%.1f%%)", ColdSamples)
|
|
|
|
<< " samples recorded in cold regions of split functions.\n";
|
2021-12-29 10:29:54 +08:00
|
|
|
if (ColdSamples > 5.0f)
|
2019-04-13 08:33:46 +08:00
|
|
|
outs()
|
|
|
|
<< "WARNING: The BOLT-processed binary where samples were collected "
|
|
|
|
"likely used bad data or your service observed a large shift in "
|
|
|
|
"profile. You may want to audit this.\n";
|
|
|
|
}
|
|
|
|
|
2017-09-02 09:13:51 +08:00
|
|
|
return std::error_code();
|
|
|
|
}
|
|
|
|
|
2019-01-16 15:43:40 +08:00
|
|
|
void DataAggregator::processBranchEvents() {
|
|
|
|
outs() << "PERF2BOLT: processing branch events...\n";
|
|
|
|
NamedRegionTimer T("processBranch", "Processing branch events",
|
|
|
|
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
|
|
|
|
|
|
|
|
for (const auto &AggrLBR : FallthroughLBRs) {
|
2021-04-08 15:19:26 +08:00
|
|
|
const Trace &Loc = AggrLBR.first;
|
|
|
|
const FTInfo &Info = AggrLBR.second;
|
2019-01-16 15:43:40 +08:00
|
|
|
LBREntry First{Loc.From, Loc.From, false};
|
|
|
|
LBREntry Second{Loc.To, Loc.To, false};
|
2021-12-29 10:29:54 +08:00
|
|
|
if (Info.InternCount)
|
2019-01-16 15:43:40 +08:00
|
|
|
doTrace(First, Second, Info.InternCount);
|
|
|
|
if (Info.ExternCount) {
|
|
|
|
First.From = 0;
|
|
|
|
doTrace(First, Second, Info.ExternCount);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (const auto &AggrLBR : BranchLBRs) {
|
2021-04-08 15:19:26 +08:00
|
|
|
const Trace &Loc = AggrLBR.first;
|
|
|
|
const BranchInfo &Info = AggrLBR.second;
|
2019-01-16 15:43:40 +08:00
|
|
|
doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-04-14 02:18:46 +08:00
|
|
|
std::error_code DataAggregator::parseBasicEvents() {
|
2019-01-16 15:43:40 +08:00
|
|
|
outs() << "PERF2BOLT: parsing basic events (without LBR)...\n";
|
|
|
|
NamedRegionTimer T("parseBasic", "Parsing basic events", TimerGroupName,
|
2018-04-14 02:18:46 +08:00
|
|
|
TimerGroupDesc, opts::TimeAggregator);
|
|
|
|
while (hasData()) {
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<PerfBasicSample> Sample = parseBasicSample();
|
2019-01-16 15:43:40 +08:00
|
|
|
if (std::error_code EC = Sample.getError())
|
2018-04-14 02:18:46 +08:00
|
|
|
return EC;
|
|
|
|
|
2019-01-16 15:43:40 +08:00
|
|
|
if (!Sample->PC)
|
2018-04-14 02:18:46 +08:00
|
|
|
continue;
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
|
2019-01-16 15:43:40 +08:00
|
|
|
BF->setHasProfileAvailable();
|
|
|
|
|
2019-01-23 09:21:45 +08:00
|
|
|
++BasicSamples[Sample->PC];
|
|
|
|
EventNames.insert(Sample->EventName);
|
2019-01-16 15:43:40 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return std::error_code();
|
|
|
|
}
|
|
|
|
|
|
|
|
void DataAggregator::processBasicEvents() {
|
|
|
|
outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
|
2021-12-15 08:52:51 +08:00
|
|
|
NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
|
|
|
|
TimerGroupDesc, opts::TimeAggregator);
|
2021-05-14 01:50:47 +08:00
|
|
|
uint64_t OutOfRangeSamples = 0;
|
|
|
|
uint64_t NumSamples = 0;
|
2019-01-16 15:43:40 +08:00
|
|
|
for (auto &Sample : BasicSamples) {
|
2021-04-08 15:19:26 +08:00
|
|
|
const uint64_t PC = Sample.first;
|
|
|
|
const uint64_t HitCount = Sample.second;
|
2019-01-23 09:21:45 +08:00
|
|
|
NumSamples += HitCount;
|
2021-04-08 15:19:26 +08:00
|
|
|
BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
|
2018-04-14 02:18:46 +08:00
|
|
|
if (!Func) {
|
2019-01-23 09:21:45 +08:00
|
|
|
OutOfRangeSamples += HitCount;
|
2018-04-14 02:18:46 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2019-01-23 09:21:45 +08:00
|
|
|
doSample(*Func, PC, HitCount);
|
2018-04-14 02:18:46 +08:00
|
|
|
}
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
outs() << "PERF2BOLT: read " << NumSamples << " samples\n";
|
2018-04-14 02:18:46 +08:00
|
|
|
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
|
2018-04-14 02:18:46 +08:00
|
|
|
<< OutOfRangeSamples;
|
2021-05-14 01:50:47 +08:00
|
|
|
float Perc = 0.0f;
|
2018-04-14 02:18:46 +08:00
|
|
|
if (NumSamples > 0) {
|
|
|
|
outs() << " (";
|
|
|
|
Perc = OutOfRangeSamples * 100.0f / NumSamples;
|
|
|
|
if (outs().has_colors()) {
|
2021-12-29 10:29:54 +08:00
|
|
|
if (Perc > 60.0f)
|
2018-04-14 02:18:46 +08:00
|
|
|
outs().changeColor(raw_ostream::RED);
|
2021-12-29 10:29:54 +08:00
|
|
|
else if (Perc > 40.0f)
|
2018-04-14 02:18:46 +08:00
|
|
|
outs().changeColor(raw_ostream::YELLOW);
|
2021-12-29 10:29:54 +08:00
|
|
|
else
|
2018-04-14 02:18:46 +08:00
|
|
|
outs().changeColor(raw_ostream::GREEN);
|
|
|
|
}
|
|
|
|
outs() << format("%.1f%%", Perc);
|
|
|
|
if (outs().has_colors())
|
|
|
|
outs().resetColor();
|
|
|
|
outs() << ")";
|
|
|
|
}
|
|
|
|
outs() << "\n";
|
2021-12-29 10:29:54 +08:00
|
|
|
if (Perc > 80.0f)
|
2018-04-14 02:18:46 +08:00
|
|
|
outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
|
|
|
|
"binary is probably not the same binary used during profiling "
|
|
|
|
"collection. The generated data may be ineffective for improving "
|
|
|
|
"performance.\n\n";
|
|
|
|
}
|
|
|
|
|
2017-10-17 04:09:43 +08:00
|
|
|
std::error_code DataAggregator::parseMemEvents() {
|
2019-01-16 15:43:40 +08:00
|
|
|
outs() << "PERF2BOLT: parsing memory events...\n";
|
|
|
|
NamedRegionTimer T("parseMemEvents", "Parsing mem events", TimerGroupName,
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-07 07:00:23 +08:00
|
|
|
TimerGroupDesc, opts::TimeAggregator);
|
2017-10-17 04:09:43 +08:00
|
|
|
while (hasData()) {
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<PerfMemSample> Sample = parseMemSample();
|
2019-01-16 15:43:40 +08:00
|
|
|
if (std::error_code EC = Sample.getError())
|
2017-10-17 04:09:43 +08:00
|
|
|
return EC;
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
|
2019-01-16 15:43:40 +08:00
|
|
|
BF->setHasProfileAvailable();
|
|
|
|
|
|
|
|
MemSamples.emplace_back(std::move(Sample.get()));
|
|
|
|
}
|
|
|
|
|
|
|
|
return std::error_code();
|
|
|
|
}
|
|
|
|
|
|
|
|
void DataAggregator::processMemEvents() {
|
|
|
|
NamedRegionTimer T("ProcessMemEvents", "Processing mem events",
|
|
|
|
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
|
2021-04-08 15:19:26 +08:00
|
|
|
for (const PerfMemSample &Sample : MemSamples) {
|
|
|
|
uint64_t PC = Sample.PC;
|
|
|
|
uint64_t Addr = Sample.Addr;
|
2017-10-17 04:09:43 +08:00
|
|
|
StringRef FuncName;
|
|
|
|
StringRef MemName;
|
|
|
|
|
|
|
|
// Try to resolve symbol for PC
|
2021-04-08 15:19:26 +08:00
|
|
|
BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
|
2020-05-08 14:00:29 +08:00
|
|
|
if (!Func) {
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(if (PC != 0) {
|
2020-05-08 14:00:29 +08:00
|
|
|
dbgs() << "Skipped mem event: 0x" << Twine::utohexstr(PC) << " => 0x"
|
|
|
|
<< Twine::utohexstr(Addr) << "\n";
|
|
|
|
});
|
|
|
|
continue;
|
2017-10-17 04:09:43 +08:00
|
|
|
}
|
|
|
|
|
2020-05-08 14:00:29 +08:00
|
|
|
FuncName = Func->getOneName();
|
|
|
|
PC -= Func->getAddress();
|
|
|
|
|
2017-10-17 04:09:43 +08:00
|
|
|
// Try to resolve symbol for memory load
|
2021-04-08 15:19:26 +08:00
|
|
|
if (BinaryData *BD = BC->getBinaryDataContainingAddress(Addr)) {
|
2020-05-08 14:00:29 +08:00
|
|
|
MemName = BD->getName();
|
|
|
|
Addr -= BD->getAddress();
|
[BOLT] Improve ICP activation policy and hot jt processing
Summary:
Previously, ICP worked with a budget of N targets to convert to
direct calls. As long as the frequency of up to N of the hottest targets
surpassed a given fraction (threshold) of the total frequency, say, 90%,
then the optimization would convert a number of targets (up to N) to
direct calls. Otherwise, it would completely abort processing this call
site. The intent was to convert a given fraction of the indirect call
site frequency to use direct calls instead, but this ends up being a
"all or nothing" strategy.
In this patch we change this to operate with the same strategy seem in
LLVM's ICP, with two thresholds. The idea is that the hottest target of
an indirect call site will be compared against these two thresholds: one
checks its frequency relative to the total frequency of the original
indirect call site, and the other checks its frequency relative to the
remaining, unconverted targets (excluding the hottest targets that were
already converted to direct calls). The remaining threshold is typically
set higher than the total threshold. This allows us more control over
ICP.
I expose two pairs of knobs, one for jump tables and another for
indirect calls.
To improve the promotion of hot jump table indices when we have memory
profile, I also fix a bug that could cause us to promote extra indices
besides the hottest ones as seen in the memory profile. When we have the
memory profile, I reapply the dual threshold checks to the memory
profile which specifies exactly which indices are hot. I then update N,
the number of targets to be promoted, based on this new information, and
update frequency information.
To allow us to work with smaller profiles, I also created an option in
perf2bolt to filter out memory samples outside the statically allocated
area of the binary (heap/stack). This option is on by default.
(cherry picked from FBD15187832)
2019-05-03 03:28:34 +08:00
|
|
|
} else if (opts::FilterMemProfile) {
|
2020-05-08 14:00:29 +08:00
|
|
|
// Filter out heap/stack accesses
|
[BOLT] Improve ICP activation policy and hot jt processing
Summary:
Previously, ICP worked with a budget of N targets to convert to
direct calls. As long as the frequency of up to N of the hottest targets
surpassed a given fraction (threshold) of the total frequency, say, 90%,
then the optimization would convert a number of targets (up to N) to
direct calls. Otherwise, it would completely abort processing this call
site. The intent was to convert a given fraction of the indirect call
site frequency to use direct calls instead, but this ends up being a
"all or nothing" strategy.
In this patch we change this to operate with the same strategy seem in
LLVM's ICP, with two thresholds. The idea is that the hottest target of
an indirect call site will be compared against these two thresholds: one
checks its frequency relative to the total frequency of the original
indirect call site, and the other checks its frequency relative to the
remaining, unconverted targets (excluding the hottest targets that were
already converted to direct calls). The remaining threshold is typically
set higher than the total threshold. This allows us more control over
ICP.
I expose two pairs of knobs, one for jump tables and another for
indirect calls.
To improve the promotion of hot jump table indices when we have memory
profile, I also fix a bug that could cause us to promote extra indices
besides the hottest ones as seen in the memory profile. When we have the
memory profile, I reapply the dual threshold checks to the memory
profile which specifies exactly which indices are hot. I then update N,
the number of targets to be promoted, based on this new information, and
update frequency information.
To allow us to work with smaller profiles, I also created an option in
perf2bolt to filter out memory samples outside the statically allocated
area of the binary (heap/stack). This option is on by default.
(cherry picked from FBD15187832)
2019-05-03 03:28:34 +08:00
|
|
|
continue;
|
2017-10-17 04:09:43 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
const Location FuncLoc(!FuncName.empty(), FuncName, PC);
|
|
|
|
const Location AddrLoc(!MemName.empty(), MemName, Addr);
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
FuncMemData *MemData = &NamesToMemEvents[FuncName];
|
2020-05-08 14:00:29 +08:00
|
|
|
setMemData(*Func, MemData);
|
|
|
|
MemData->update(FuncLoc, AddrLoc);
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n");
|
2017-10-17 04:09:43 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-01-16 15:43:40 +08:00
|
|
|
std::error_code DataAggregator::parsePreAggregatedLBRSamples() {
|
|
|
|
outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
|
|
|
|
NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
|
|
|
|
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
|
2018-07-18 09:31:46 +08:00
|
|
|
while (hasData()) {
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<AggregatedLBREntry> AggrEntry = parseAggregatedLBREntry();
|
2019-01-16 15:43:40 +08:00
|
|
|
if (std::error_code EC = AggrEntry.getError())
|
2018-07-18 09:31:46 +08:00
|
|
|
return EC;
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
if (BinaryFunction *BF =
|
|
|
|
getBinaryFunctionContainingAddress(AggrEntry->From.Offset))
|
2019-01-16 15:43:40 +08:00
|
|
|
BF->setHasProfileAvailable();
|
2021-04-08 15:19:26 +08:00
|
|
|
if (BinaryFunction *BF =
|
|
|
|
getBinaryFunctionContainingAddress(AggrEntry->To.Offset))
|
2019-01-16 15:43:40 +08:00
|
|
|
BF->setHasProfileAvailable();
|
|
|
|
|
|
|
|
AggregatedLBRs.emplace_back(std::move(AggrEntry.get()));
|
|
|
|
}
|
|
|
|
|
|
|
|
return std::error_code();
|
|
|
|
}
|
2018-07-18 09:31:46 +08:00
|
|
|
|
2019-01-16 15:43:40 +08:00
|
|
|
void DataAggregator::processPreAggregated() {
|
|
|
|
outs() << "PERF2BOLT: processing pre-aggregated profile...\n";
|
|
|
|
NamedRegionTimer T("processAggregated", "Processing aggregated branch events",
|
|
|
|
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
|
|
|
|
|
2021-05-14 01:50:47 +08:00
|
|
|
uint64_t NumTraces = 0;
|
2021-04-08 15:19:26 +08:00
|
|
|
for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
|
2018-07-18 09:31:46 +08:00
|
|
|
switch (AggrEntry.EntryType) {
|
|
|
|
case AggregatedLBREntry::BRANCH:
|
|
|
|
doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
|
|
|
|
AggrEntry.Mispreds);
|
|
|
|
break;
|
|
|
|
case AggregatedLBREntry::FT:
|
|
|
|
case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
|
|
|
|
LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT
|
|
|
|
? AggrEntry.From.Offset
|
|
|
|
: 0,
|
|
|
|
AggrEntry.From.Offset, false};
|
|
|
|
LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false};
|
|
|
|
doTrace(First, Second, AggrEntry.Count);
|
2019-04-26 07:34:50 +08:00
|
|
|
NumTraces += AggrEntry.Count;
|
2018-07-18 09:31:46 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-01-16 15:43:40 +08:00
|
|
|
|
|
|
|
outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
|
|
|
|
<< " aggregated LBR entries\n";
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
|
2018-07-18 09:31:46 +08:00
|
|
|
<< NumInvalidTraces;
|
2021-05-14 01:50:47 +08:00
|
|
|
float Perc = 0.0f;
|
2018-07-18 09:31:46 +08:00
|
|
|
if (NumTraces > 0) {
|
|
|
|
outs() << " (";
|
|
|
|
Perc = NumInvalidTraces * 100.0f / NumTraces;
|
|
|
|
if (outs().has_colors()) {
|
2021-12-29 10:29:54 +08:00
|
|
|
if (Perc > 10.0f)
|
2018-07-18 09:31:46 +08:00
|
|
|
outs().changeColor(raw_ostream::RED);
|
2021-12-29 10:29:54 +08:00
|
|
|
else if (Perc > 5.0f)
|
2018-07-18 09:31:46 +08:00
|
|
|
outs().changeColor(raw_ostream::YELLOW);
|
2021-12-29 10:29:54 +08:00
|
|
|
else
|
2018-07-18 09:31:46 +08:00
|
|
|
outs().changeColor(raw_ostream::GREEN);
|
|
|
|
}
|
|
|
|
outs() << format("%.1f%%", Perc);
|
|
|
|
if (outs().has_colors())
|
|
|
|
outs().resetColor();
|
|
|
|
outs() << ")";
|
|
|
|
}
|
|
|
|
outs() << "\n";
|
2021-12-29 10:29:54 +08:00
|
|
|
if (Perc > 10.0f)
|
2018-07-18 09:31:46 +08:00
|
|
|
outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
|
|
|
|
"binary is probably not the same binary used during profiling "
|
|
|
|
"collection. The generated data may be ineffective for improving "
|
|
|
|
"performance.\n\n";
|
|
|
|
|
|
|
|
outs() << "PERF2BOLT: Out of range traces involving unknown regions: "
|
|
|
|
<< NumLongRangeTraces;
|
2021-12-29 10:29:54 +08:00
|
|
|
if (NumTraces > 0)
|
2018-07-18 09:31:46 +08:00
|
|
|
outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
|
|
|
|
outs() << "\n";
|
|
|
|
}
|
|
|
|
|
2021-11-12 10:14:53 +08:00
|
|
|
Optional<int32_t> DataAggregator::parseCommExecEvent() {
|
2021-04-08 15:19:26 +08:00
|
|
|
size_t LineEnd = ParsingBuf.find_first_of("\n");
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
if (LineEnd == StringRef::npos) {
|
|
|
|
reportError("expected rest of line");
|
|
|
|
Diag << "Found: " << ParsingBuf << "\n";
|
|
|
|
return NoneType();
|
|
|
|
}
|
|
|
|
StringRef Line = ParsingBuf.substr(0, LineEnd);
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
size_t Pos = Line.find("PERF_RECORD_COMM exec");
|
2021-12-29 10:29:54 +08:00
|
|
|
if (Pos == StringRef::npos)
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
return NoneType();
|
|
|
|
Line = Line.drop_front(Pos);
|
|
|
|
|
|
|
|
// Line:
|
|
|
|
// PERF_RECORD_COMM exec: <name>:<pid>/<tid>"
|
2021-04-08 15:19:26 +08:00
|
|
|
StringRef PIDStr = Line.rsplit(':').second.split('/').first;
|
2021-11-12 10:14:53 +08:00
|
|
|
int32_t PID;
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
if (PIDStr.getAsInteger(10, PID)) {
|
|
|
|
reportError("expected PID");
|
|
|
|
Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
|
|
|
|
return NoneType();
|
|
|
|
}
|
|
|
|
|
|
|
|
return PID;
|
|
|
|
}
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
Optional<uint64_t> parsePerfTime(const StringRef TimeStr) {
|
2021-04-08 15:19:26 +08:00
|
|
|
const StringRef SecTimeStr = TimeStr.split('.').first;
|
|
|
|
const StringRef USecTimeStr = TimeStr.split('.').second;
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
uint64_t SecTime;
|
|
|
|
uint64_t USecTime;
|
|
|
|
if (SecTimeStr.getAsInteger(10, SecTime) ||
|
2021-12-29 10:29:54 +08:00
|
|
|
USecTimeStr.getAsInteger(10, USecTime))
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
return NoneType();
|
|
|
|
return SecTime * 1000000ULL + USecTime;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-12-15 08:52:51 +08:00
|
|
|
Optional<DataAggregator::ForkInfo> DataAggregator::parseForkEvent() {
|
|
|
|
while (checkAndConsumeFS()) {
|
|
|
|
}
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
size_t LineEnd = ParsingBuf.find_first_of("\n");
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
if (LineEnd == StringRef::npos) {
|
|
|
|
reportError("expected rest of line");
|
|
|
|
Diag << "Found: " << ParsingBuf << "\n";
|
|
|
|
return NoneType();
|
|
|
|
}
|
|
|
|
StringRef Line = ParsingBuf.substr(0, LineEnd);
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
size_t Pos = Line.find("PERF_RECORD_FORK");
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
if (Pos == StringRef::npos) {
|
|
|
|
consumeRestOfLine();
|
|
|
|
return NoneType();
|
|
|
|
}
|
|
|
|
|
|
|
|
ForkInfo FI;
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
const StringRef TimeStr =
|
|
|
|
Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
|
|
|
|
if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) {
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
FI.Time = *TimeRes;
|
|
|
|
}
|
|
|
|
|
|
|
|
Line = Line.drop_front(Pos);
|
|
|
|
|
|
|
|
// Line:
|
|
|
|
// PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>)
|
2021-04-08 15:19:26 +08:00
|
|
|
const StringRef ChildPIDStr = Line.split('(').second.split(':').first;
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
if (ChildPIDStr.getAsInteger(10, FI.ChildPID)) {
|
|
|
|
reportError("expected PID");
|
|
|
|
Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n";
|
|
|
|
return NoneType();
|
|
|
|
}
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
const StringRef ParentPIDStr = Line.rsplit('(').second.split(':').first;
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
if (ParentPIDStr.getAsInteger(10, FI.ParentPID)) {
|
|
|
|
reportError("expected PID");
|
|
|
|
Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n";
|
|
|
|
return NoneType();
|
|
|
|
}
|
|
|
|
|
|
|
|
consumeRestOfLine();
|
|
|
|
|
|
|
|
return FI;
|
|
|
|
}
|
|
|
|
|
2018-08-15 04:24:44 +08:00
|
|
|
ErrorOr<std::pair<StringRef, DataAggregator::MMapInfo>>
|
|
|
|
DataAggregator::parseMMapEvent() {
|
2021-12-15 08:52:51 +08:00
|
|
|
while (checkAndConsumeFS()) {
|
|
|
|
}
|
2017-09-02 09:13:51 +08:00
|
|
|
|
2018-08-15 04:24:44 +08:00
|
|
|
MMapInfo ParsedInfo;
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
size_t LineEnd = ParsingBuf.find_first_of("\n");
|
2017-09-02 09:13:51 +08:00
|
|
|
if (LineEnd == StringRef::npos) {
|
|
|
|
reportError("expected rest of line");
|
|
|
|
Diag << "Found: " << ParsingBuf << "\n";
|
|
|
|
return make_error_code(llvm::errc::io_error);
|
|
|
|
}
|
|
|
|
StringRef Line = ParsingBuf.substr(0, LineEnd);
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
size_t Pos = Line.find("PERF_RECORD_MMAP2");
|
2018-08-15 04:24:44 +08:00
|
|
|
if (Pos == StringRef::npos) {
|
2018-07-14 06:26:41 +08:00
|
|
|
consumeRestOfLine();
|
2018-08-15 04:24:44 +08:00
|
|
|
return std::make_pair(StringRef(), ParsedInfo);
|
2018-07-14 06:26:41 +08:00
|
|
|
}
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
|
|
|
|
// Line:
|
|
|
|
// {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name>
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
const StringRef TimeStr =
|
|
|
|
Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
|
2021-12-29 10:29:54 +08:00
|
|
|
if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr))
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
ParsedInfo.Time = *TimeRes;
|
|
|
|
|
2018-08-15 04:24:44 +08:00
|
|
|
Line = Line.drop_front(Pos);
|
2018-07-14 06:26:41 +08:00
|
|
|
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
// Line:
|
|
|
|
// PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name>
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
StringRef FileName = Line.rsplit(FieldSeparator).second;
|
2018-08-15 04:24:44 +08:00
|
|
|
if (FileName.startswith("//") || FileName.startswith("[")) {
|
|
|
|
consumeRestOfLine();
|
2018-08-15 04:24:44 +08:00
|
|
|
return std::make_pair(StringRef(), ParsedInfo);
|
2018-08-15 04:24:44 +08:00
|
|
|
}
|
|
|
|
FileName = sys::path::filename(FileName);
|
2018-07-14 06:26:41 +08:00
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
const StringRef PIDStr = Line.split(FieldSeparator).second.split('/').first;
|
2018-08-15 04:24:44 +08:00
|
|
|
if (PIDStr.getAsInteger(10, ParsedInfo.PID)) {
|
2018-07-14 06:26:41 +08:00
|
|
|
reportError("expected PID");
|
2018-08-15 04:24:44 +08:00
|
|
|
Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
|
|
|
|
return make_error_code(llvm::errc::io_error);
|
|
|
|
}
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
const StringRef BaseAddressStr = Line.split('[').second.split('(').first;
|
2022-04-14 10:39:39 +08:00
|
|
|
if (BaseAddressStr.getAsInteger(0, ParsedInfo.MMapAddress)) {
|
2018-08-15 04:24:44 +08:00
|
|
|
reportError("expected base address");
|
|
|
|
Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n";
|
|
|
|
return make_error_code(llvm::errc::io_error);
|
|
|
|
}
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
const StringRef SizeStr = Line.split('(').second.split(')').first;
|
2018-08-15 04:24:44 +08:00
|
|
|
if (SizeStr.getAsInteger(0, ParsedInfo.Size)) {
|
|
|
|
reportError("expected mmaped size");
|
|
|
|
Diag << "Found: " << SizeStr << "in '" << Line << "'\n";
|
2018-07-14 06:26:41 +08:00
|
|
|
return make_error_code(llvm::errc::io_error);
|
2017-09-02 09:13:51 +08:00
|
|
|
}
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
const StringRef OffsetStr =
|
2019-11-15 08:07:11 +08:00
|
|
|
Line.split('@').second.ltrim().split(FieldSeparator).first;
|
|
|
|
if (OffsetStr.getAsInteger(0, ParsedInfo.Offset)) {
|
|
|
|
reportError("expected mmaped page-aligned offset");
|
|
|
|
Diag << "Found: " << OffsetStr << "in '" << Line << "'\n";
|
|
|
|
return make_error_code(llvm::errc::io_error);
|
|
|
|
}
|
|
|
|
|
2017-09-02 09:13:51 +08:00
|
|
|
consumeRestOfLine();
|
2018-07-14 06:26:41 +08:00
|
|
|
|
2018-08-15 04:24:44 +08:00
|
|
|
return std::make_pair(FileName, ParsedInfo);
|
2017-09-02 09:13:51 +08:00
|
|
|
}
|
|
|
|
|
2018-08-15 04:24:44 +08:00
|
|
|
std::error_code DataAggregator::parseMMapEvents() {
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
outs() << "PERF2BOLT: parsing perf-script mmap events output\n";
|
2018-08-15 04:24:44 +08:00
|
|
|
NamedRegionTimer T("parseMMapEvents", "Parsing mmap events", TimerGroupName,
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-07 07:00:23 +08:00
|
|
|
TimerGroupDesc, opts::TimeAggregator);
|
2017-09-02 09:13:51 +08:00
|
|
|
|
2018-08-15 04:24:44 +08:00
|
|
|
std::multimap<StringRef, MMapInfo> GlobalMMapInfo;
|
2017-09-02 09:13:51 +08:00
|
|
|
while (hasData()) {
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<std::pair<StringRef, MMapInfo>> FileMMapInfoRes = parseMMapEvent();
|
2018-08-15 04:24:44 +08:00
|
|
|
if (std::error_code EC = FileMMapInfoRes.getError())
|
2017-09-02 09:13:51 +08:00
|
|
|
return EC;
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get();
|
2018-08-15 04:24:44 +08:00
|
|
|
if (FileMMapInfo.second.PID == -1)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// Consider only the first mapping of the file for any given PID
|
|
|
|
bool PIDExists = false;
|
|
|
|
auto Range = GlobalMMapInfo.equal_range(FileMMapInfo.first);
|
|
|
|
for (auto MI = Range.first; MI != Range.second; ++MI) {
|
|
|
|
if (MI->second.PID == FileMMapInfo.second.PID) {
|
|
|
|
PIDExists = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (PIDExists)
|
2017-09-02 09:13:51 +08:00
|
|
|
continue;
|
2018-07-14 06:26:41 +08:00
|
|
|
|
2018-08-15 04:24:44 +08:00
|
|
|
GlobalMMapInfo.insert(FileMMapInfo);
|
2018-07-14 06:26:41 +08:00
|
|
|
}
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
LLVM_DEBUG({
|
2018-08-15 04:24:44 +08:00
|
|
|
dbgs() << "FileName -> mmap info:\n";
|
2021-12-29 10:29:54 +08:00
|
|
|
for (const std::pair<const StringRef, MMapInfo> &Pair : GlobalMMapInfo)
|
2018-08-15 04:24:44 +08:00
|
|
|
dbgs() << " " << Pair.first << " : " << Pair.second.PID << " [0x"
|
2022-04-14 10:39:39 +08:00
|
|
|
<< Twine::utohexstr(Pair.second.MMapAddress) << ", "
|
2019-11-15 08:07:11 +08:00
|
|
|
<< Twine::utohexstr(Pair.second.Size) << " @ "
|
|
|
|
<< Twine::utohexstr(Pair.second.Offset) << "]\n";
|
2021-04-08 15:19:26 +08:00
|
|
|
});
|
2018-07-14 06:26:41 +08:00
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
StringRef NameToUse = llvm::sys::path::filename(BC->getFilename());
|
2018-08-15 04:24:44 +08:00
|
|
|
if (GlobalMMapInfo.count(NameToUse) == 0 && !BuildIDBinaryName.empty()) {
|
2018-07-14 06:26:41 +08:00
|
|
|
errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName
|
|
|
|
<< "\" for profile matching\n";
|
2018-08-31 05:51:10 +08:00
|
|
|
NameToUse = BuildIDBinaryName;
|
2018-07-14 06:26:41 +08:00
|
|
|
}
|
2017-09-02 09:13:51 +08:00
|
|
|
|
2018-08-15 04:24:44 +08:00
|
|
|
auto Range = GlobalMMapInfo.equal_range(NameToUse);
|
2018-07-14 06:26:41 +08:00
|
|
|
for (auto I = Range.first; I != Range.second; ++I) {
|
2022-04-14 10:39:39 +08:00
|
|
|
MMapInfo &MMapInfo = I->second;
|
|
|
|
if (BC->HasFixedLoadAddress && MMapInfo.MMapAddress) {
|
2019-12-18 03:17:31 +08:00
|
|
|
// Check that the binary mapping matches one of the segments.
|
2021-05-01 06:02:29 +08:00
|
|
|
bool MatchFound = false;
|
2020-06-27 07:52:07 +08:00
|
|
|
for (auto &KV : BC->SegmentMapInfo) {
|
2021-04-08 15:19:26 +08:00
|
|
|
SegmentInfo &SegInfo = KV.second;
|
2022-04-14 10:39:39 +08:00
|
|
|
// The mapping is page-aligned and hence the MMapAddress could be
|
2021-05-01 06:02:29 +08:00
|
|
|
// different from the segment start address. We cannot know the page
|
|
|
|
// size of the mapping, but we know it should not exceed the segment
|
|
|
|
// alignment value. Hence we are performing an approximate check.
|
2022-04-14 10:39:39 +08:00
|
|
|
if (SegInfo.Address >= MMapInfo.MMapAddress &&
|
|
|
|
SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment) {
|
2019-12-18 03:17:31 +08:00
|
|
|
MatchFound = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!MatchFound) {
|
|
|
|
errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse
|
2022-04-14 10:39:39 +08:00
|
|
|
<< " at 0x" << Twine::utohexstr(MMapInfo.MMapAddress) << '\n';
|
2019-12-18 03:17:31 +08:00
|
|
|
continue;
|
|
|
|
}
|
2019-12-06 08:52:15 +08:00
|
|
|
}
|
|
|
|
|
2022-04-14 10:39:39 +08:00
|
|
|
// Set base address for shared objects.
|
|
|
|
if (!BC->HasFixedLoadAddress) {
|
|
|
|
Optional<uint64_t> BaseAddress =
|
|
|
|
BC->getBaseAddressForMapping(MMapInfo.MMapAddress, MMapInfo.Offset);
|
|
|
|
if (!BaseAddress) {
|
|
|
|
errs() << "PERF2BOLT-WARNING: unable to find base address of the "
|
|
|
|
"binary when memory mapped at 0x"
|
|
|
|
<< Twine::utohexstr(MMapInfo.MMapAddress)
|
|
|
|
<< " using file offset 0x" << Twine::utohexstr(MMapInfo.Offset)
|
|
|
|
<< ". Ignoring profile data for this mapping\n";
|
|
|
|
continue;
|
|
|
|
} else {
|
|
|
|
MMapInfo.BaseAddress = *BaseAddress;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-05-01 06:02:29 +08:00
|
|
|
BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
|
2017-09-02 09:13:51 +08:00
|
|
|
}
|
2018-07-14 06:26:41 +08:00
|
|
|
|
2018-08-15 04:24:44 +08:00
|
|
|
if (BinaryMMapInfo.empty()) {
|
2018-05-17 04:31:13 +08:00
|
|
|
if (errs().has_colors())
|
2018-07-14 06:26:41 +08:00
|
|
|
errs().changeColor(raw_ostream::RED);
|
|
|
|
errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \""
|
2020-05-08 14:00:29 +08:00
|
|
|
<< BC->getFilename() << "\".";
|
2018-08-15 04:24:44 +08:00
|
|
|
if (!GlobalMMapInfo.empty()) {
|
2018-07-14 06:26:41 +08:00
|
|
|
errs() << " Profile for the following binary name(s) is available:\n";
|
2018-08-15 04:24:44 +08:00
|
|
|
for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE;
|
2021-12-29 10:29:54 +08:00
|
|
|
I = GlobalMMapInfo.upper_bound(I->first))
|
2018-07-14 06:26:41 +08:00
|
|
|
errs() << " " << I->first << '\n';
|
|
|
|
errs() << "Please rename the input binary.\n";
|
|
|
|
} else {
|
|
|
|
errs() << " Failed to extract any binary name from a profile.\n";
|
|
|
|
}
|
2018-05-17 04:31:13 +08:00
|
|
|
if (errs().has_colors())
|
|
|
|
errs().resetColor();
|
2018-08-15 04:24:44 +08:00
|
|
|
|
2018-07-14 06:26:41 +08:00
|
|
|
exit(1);
|
2018-05-17 04:31:13 +08:00
|
|
|
}
|
2017-09-02 09:13:51 +08:00
|
|
|
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
return std::error_code();
|
|
|
|
}
|
|
|
|
|
|
|
|
std::error_code DataAggregator::parseTaskEvents() {
|
|
|
|
outs() << "PERF2BOLT: parsing perf-script task events output\n";
|
|
|
|
NamedRegionTimer T("parseTaskEvents", "Parsing task events", TimerGroupName,
|
|
|
|
TimerGroupDesc, opts::TimeAggregator);
|
|
|
|
|
|
|
|
while (hasData()) {
|
2021-11-12 10:14:53 +08:00
|
|
|
if (Optional<int32_t> CommInfo = parseCommExecEvent()) {
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
// Remove forked child that ran execve
|
|
|
|
auto MMapInfoIter = BinaryMMapInfo.find(*CommInfo);
|
2021-12-29 10:29:54 +08:00
|
|
|
if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked)
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
BinaryMMapInfo.erase(MMapInfoIter);
|
|
|
|
consumeRestOfLine();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
Optional<ForkInfo> ForkInfo = parseForkEvent();
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
if (!ForkInfo)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (ForkInfo->ParentPID == ForkInfo->ChildPID)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (ForkInfo->Time == 0) {
|
|
|
|
// Process was forked and mmaped before perf ran. In this case the child
|
|
|
|
// should have its own mmap entry unless it was execve'd.
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto MMapInfoIter = BinaryMMapInfo.find(ForkInfo->ParentPID);
|
|
|
|
if (MMapInfoIter == BinaryMMapInfo.end())
|
|
|
|
continue;
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
MMapInfo MMapInfo = MMapInfoIter->second;
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
MMapInfo.PID = ForkInfo->ChildPID;
|
|
|
|
MMapInfo.Forked = true;
|
|
|
|
BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
|
|
|
|
}
|
|
|
|
|
|
|
|
outs() << "PERF2BOLT: input binary is associated with "
|
2018-08-15 04:24:44 +08:00
|
|
|
<< BinaryMMapInfo.size() << " PID(s)\n";
|
2018-08-15 04:24:44 +08:00
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
LLVM_DEBUG({
|
2021-12-29 10:29:54 +08:00
|
|
|
for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
outs() << " " << MMI.second.PID << (MMI.second.Forked ? " (forked)" : "")
|
2022-04-14 10:39:39 +08:00
|
|
|
<< ": (0x" << Twine::utohexstr(MMI.second.MMapAddress) << ": 0x"
|
2021-12-15 08:52:51 +08:00
|
|
|
<< Twine::utohexstr(MMI.second.Size) << ")\n";
|
2021-04-08 15:19:26 +08:00
|
|
|
});
|
[perf2bolt] Better tracking of process forking
Summary:
Improve tracking of forked processes.
If a process corresponding to the input binary has forked/started
before 'perf record' was initiated, then the full name of the binary
will be recorded in a corresponding MMAP2 event. We've being handling
such cases well so far.
However, if the process was forked after 'perf record' has started, and
execve(2) wasn't called afterwards, then there will be no MMAP2 event
recorded corresponding to the mapping of the main binary (unrelated
MMAP2 events could still be recorded).
To track such cases, we need to parse 'perf script --show-task-events'
command output, and to scan for PERF_RECORD_FORK events, and then add
forked process PIDs to the list associated with the input binary. If
the fork event was followed by an exec event (PERF_RECORD_COMM exec)
of a different binary, then the forked PID should be ignored. If the
exec event was associated with our input binary, then the correct MMAP2
event was recorded and parsed.
To track if the event occurred before or after 'perf record', we parse
event's time. This helps us to differentiate some events. E.g. the exec
event is only registered correctly if it happened after perf recording
has started (otherwise the "exec" part is missing), and thus we only
record forks with non-zero time stamps.
(cherry picked from FBD13250904)
2018-11-22 12:04:00 +08:00
|
|
|
|
2017-09-02 09:13:51 +08:00
|
|
|
return std::error_code();
|
|
|
|
}
|
|
|
|
|
2017-10-07 05:42:46 +08:00
|
|
|
Optional<std::pair<StringRef, StringRef>>
|
|
|
|
DataAggregator::parseNameBuildIDPair() {
|
2021-12-15 08:52:51 +08:00
|
|
|
while (checkAndConsumeFS()) {
|
|
|
|
}
|
2017-10-07 05:42:46 +08:00
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<StringRef> BuildIDStr = parseString(FieldSeparator, true);
|
2017-10-07 05:42:46 +08:00
|
|
|
if (std::error_code EC = BuildIDStr.getError())
|
|
|
|
return NoneType();
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<StringRef> NameStr = parseString(FieldSeparator, true);
|
2017-10-07 05:42:46 +08:00
|
|
|
if (std::error_code EC = NameStr.getError())
|
|
|
|
return NoneType();
|
|
|
|
|
|
|
|
consumeRestOfLine();
|
|
|
|
return std::make_pair(NameStr.get(), BuildIDStr.get());
|
|
|
|
}
|
|
|
|
|
2018-05-17 04:31:13 +08:00
|
|
|
Optional<StringRef>
|
|
|
|
DataAggregator::getFileNameForBuildID(StringRef FileBuildID) {
|
2017-10-07 05:42:46 +08:00
|
|
|
while (hasData()) {
|
2021-04-08 15:19:26 +08:00
|
|
|
Optional<std::pair<StringRef, StringRef>> IDPair = parseNameBuildIDPair();
|
2017-10-07 05:42:46 +08:00
|
|
|
if (!IDPair)
|
|
|
|
return NoneType();
|
|
|
|
|
2018-07-14 01:49:41 +08:00
|
|
|
if (IDPair->second.startswith(FileBuildID))
|
2018-05-17 04:31:13 +08:00
|
|
|
return sys::path::filename(IDPair->first);
|
2017-10-07 05:42:46 +08:00
|
|
|
}
|
|
|
|
return NoneType();
|
|
|
|
}
|
|
|
|
|
2020-05-08 14:00:29 +08:00
|
|
|
std::error_code
|
|
|
|
DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
|
2017-09-02 09:13:51 +08:00
|
|
|
std::error_code EC;
|
2020-12-02 08:29:39 +08:00
|
|
|
raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
|
2017-09-02 09:13:51 +08:00
|
|
|
if (EC)
|
|
|
|
return EC;
|
|
|
|
|
2017-10-17 04:09:43 +08:00
|
|
|
bool WriteMemLocs = false;
|
|
|
|
|
2021-12-15 08:52:51 +08:00
|
|
|
auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) {
|
2017-10-17 04:09:43 +08:00
|
|
|
if (WriteMemLocs)
|
|
|
|
OutFile << (Loc.IsSymbol ? "4 " : "3 ");
|
|
|
|
else
|
|
|
|
OutFile << (Loc.IsSymbol ? "1 " : "0 ");
|
2021-06-30 00:54:08 +08:00
|
|
|
OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Loc.Name))
|
|
|
|
<< " " << Twine::utohexstr(Loc.Offset) << FieldSeparator;
|
2017-10-17 04:09:43 +08:00
|
|
|
};
|
|
|
|
|
2021-05-14 01:50:47 +08:00
|
|
|
uint64_t BranchValues = 0;
|
|
|
|
uint64_t MemValues = 0;
|
2017-10-17 04:09:43 +08:00
|
|
|
|
2019-04-13 08:33:46 +08:00
|
|
|
if (BAT)
|
|
|
|
OutFile << "boltedcollection\n";
|
2018-04-14 02:18:46 +08:00
|
|
|
if (opts::BasicAggregation) {
|
|
|
|
OutFile << "no_lbr";
|
2021-12-29 10:29:54 +08:00
|
|
|
for (const StringMapEntry<NoneType> &Entry : EventNames)
|
2018-04-14 02:18:46 +08:00
|
|
|
OutFile << " " << Entry.getKey();
|
|
|
|
OutFile << "\n";
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
for (const StringMapEntry<FuncSampleData> &Func : NamesToSamples) {
|
|
|
|
for (const SampleInfo &SI : Func.getValue().Data) {
|
2018-04-14 02:18:46 +08:00
|
|
|
writeLocation(SI.Loc);
|
|
|
|
OutFile << SI.Hits << "\n";
|
|
|
|
++BranchValues;
|
|
|
|
}
|
2017-09-02 09:13:51 +08:00
|
|
|
}
|
2018-04-14 02:18:46 +08:00
|
|
|
} else {
|
2021-04-08 15:19:26 +08:00
|
|
|
for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) {
|
|
|
|
for (const llvm::bolt::BranchInfo &BI : Func.getValue().Data) {
|
2018-04-14 02:18:46 +08:00
|
|
|
writeLocation(BI.From);
|
|
|
|
writeLocation(BI.To);
|
|
|
|
OutFile << BI.Mispreds << " " << BI.Branches << "\n";
|
|
|
|
++BranchValues;
|
|
|
|
}
|
2021-04-08 15:19:26 +08:00
|
|
|
for (const llvm::bolt::BranchInfo &BI : Func.getValue().EntryData) {
|
2018-04-14 02:18:46 +08:00
|
|
|
// Do not output if source is a known symbol, since this was already
|
|
|
|
// accounted for in the source function
|
|
|
|
if (BI.From.IsSymbol)
|
|
|
|
continue;
|
|
|
|
writeLocation(BI.From);
|
|
|
|
writeLocation(BI.To);
|
|
|
|
OutFile << BI.Mispreds << " " << BI.Branches << "\n";
|
|
|
|
++BranchValues;
|
|
|
|
}
|
2017-10-17 04:09:43 +08:00
|
|
|
}
|
|
|
|
|
2018-04-14 02:18:46 +08:00
|
|
|
WriteMemLocs = true;
|
2021-04-08 15:19:26 +08:00
|
|
|
for (const StringMapEntry<FuncMemData> &Func : NamesToMemEvents) {
|
|
|
|
for (const MemInfo &MemEvent : Func.getValue().Data) {
|
2018-04-14 02:18:46 +08:00
|
|
|
writeLocation(MemEvent.Offset);
|
|
|
|
writeLocation(MemEvent.Addr);
|
|
|
|
OutFile << MemEvent.Count << "\n";
|
|
|
|
++MemValues;
|
|
|
|
}
|
2017-09-02 09:13:51 +08:00
|
|
|
}
|
|
|
|
}
|
2017-10-17 04:09:43 +08:00
|
|
|
|
2021-12-15 08:52:51 +08:00
|
|
|
outs() << "PERF2BOLT: wrote " << BranchValues << " objects and " << MemValues
|
|
|
|
<< " memory objects to " << OutputFilename << "\n";
|
2017-09-02 09:13:51 +08:00
|
|
|
|
|
|
|
return std::error_code();
|
|
|
|
}
|
|
|
|
|
2021-12-15 08:52:51 +08:00
|
|
|
void DataAggregator::dump() const { DataReader::dump(); }
|
2017-09-02 09:13:51 +08:00
|
|
|
|
|
|
|
void DataAggregator::dump(const LBREntry &LBR) const {
|
|
|
|
Diag << "From: " << Twine::utohexstr(LBR.From)
|
|
|
|
<< " To: " << Twine::utohexstr(LBR.To) << " Mispred? " << LBR.Mispred
|
|
|
|
<< "\n";
|
|
|
|
}
|
|
|
|
|
2017-10-17 04:09:43 +08:00
|
|
|
void DataAggregator::dump(const PerfBranchSample &Sample) const {
|
2017-09-02 09:13:51 +08:00
|
|
|
Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n";
|
2021-12-29 10:29:54 +08:00
|
|
|
for (const LBREntry &LBR : Sample.LBR)
|
2017-09-02 09:13:51 +08:00
|
|
|
dump(LBR);
|
|
|
|
}
|
2017-10-17 04:09:43 +08:00
|
|
|
|
|
|
|
void DataAggregator::dump(const PerfMemSample &Sample) const {
|
|
|
|
Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n";
|
|
|
|
}
|