[trace][intel pt] Add a cgroup filter

It turns out that cgroup filtering is relatively trivial and works
really nicely. Thid diffs adds automatic cgroup filtering when in
per-cpu mode, unless a new --disable-cgroup-filtering flag is passed in
the start command. At least on Meta machines, all processes are spawned
inside a cgroup by default, which comes super handy, because per cpu
tracing is now much more precise.

A manual test gave me this result

- Without filtering:
    Total number of trace items: 36083
    Total number of continuous executions found: 229
    Number of continuous executions for this thread: 2
    Total number of PSB blocks found: 98
    Number of PSB blocks for this thread 2
    Total number of unattributed PSB blocks found: 38

- With filtering:
    Total number of trace items: 87756
    Total number of continuous executions found: 123
    Number of continuous executions for this thread: 2
    Total number of PSB blocks found: 10
    Number of PSB blocks for this thread 3
    Total number of unattributed PSB blocks found: 2

Filtering gives us great results. The number of instructions collected
more than double (probalby because we have less noise in the trace), and
we have much less unattributed PSBs blocks and unrelated PSBs in
general. The ones that are unrelated probably belong to other processes
in the same cgroup.

Differential Revision: https://reviews.llvm.org/D129257
This commit is contained in:
Gaurav Gaur 2022-06-29 13:09:12 -07:00 committed by Walter Erquinigo
parent 4a843d9282
commit d30fd5c3a1
15 changed files with 163 additions and 28 deletions

View File

@ -50,6 +50,10 @@ struct TraceIntelPTStartRequest : TraceStartRequest {
/// Whether to have a trace buffer per thread or per cpu cpu.
llvm::Optional<bool> per_cpu_tracing;
/// Disable the cgroup filtering that is automatically applied in per cpu
/// mode.
llvm::Optional<bool> disable_cgroup_filtering;
bool IsPerCpuTracing() const;
};
@ -107,6 +111,7 @@ struct LinuxPerfZeroTscConversion {
struct TraceIntelPTGetStateResponse : TraceGetStateResponse {
/// The TSC to wall time conversion if it exists, otherwise \b nullptr.
llvm::Optional<LinuxPerfZeroTscConversion> tsc_perf_zero_conversion;
bool using_cgroup_filtering = false;
};
bool fromJSON(const llvm::json::Value &value,

View File

@ -21,6 +21,7 @@
#include <algorithm>
#include <cstddef>
#include <fcntl.h>
#include <fstream>
#include <linux/perf_event.h>
#include <sstream>
@ -65,6 +66,39 @@ Error IntelPTCollector::TraceStop(const TraceStopRequest &request) {
}
}
/// \return
/// some file descriptor in /sys/fs/ associated with the cgroup of the given
/// pid, or \a llvm::None if the pid is not part of a cgroup.
static Optional<int> GetCGroupFileDescriptor(lldb::pid_t pid) {
static Optional<int> fd;
if (fd)
return fd;
std::ifstream ifile;
ifile.open(formatv("/proc/{0}/cgroup", pid));
if (!ifile)
return None;
std::string line;
while (std::getline(ifile, line)) {
if (line.find("0:") != 0)
continue;
std::string slice = line.substr(line.find_first_of("/"));
if (slice.empty())
return None;
std::string cgroup_file = formatv("/sys/fs/cgroup/{0}", slice);
// This cgroup should for the duration of the target, so we don't need to
// invoke close ourselves.
int maybe_fd = open(cgroup_file.c_str(), O_RDONLY);
if (maybe_fd != -1) {
fd = maybe_fd;
return fd;
}
}
return None;
}
Error IntelPTCollector::TraceStart(const TraceIntelPTStartRequest &request) {
if (request.IsProcessTracing()) {
if (m_process_trace_up) {
@ -83,14 +117,19 @@ Error IntelPTCollector::TraceStart(const TraceIntelPTStartRequest &request) {
if (!tsc_conversion)
return tsc_conversion.takeError();
// We force the enabledment of TSCs, which is needed for correlating the
// We force the enablement of TSCs, which is needed for correlating the
// cpu traces.
TraceIntelPTStartRequest effective_request = request;
effective_request.enable_tsc = true;
// We try to use cgroup filtering whenever possible
Optional<int> cgroup_fd;
if (!request.disable_cgroup_filtering.getValueOr(false))
cgroup_fd = GetCGroupFileDescriptor(m_process.GetID());
if (Expected<IntelPTProcessTraceUP> trace =
IntelPTMultiCoreTrace::StartOnAllCores(effective_request,
m_process)) {
m_process, cgroup_fd)) {
m_process_trace_up = std::move(*trace);
return Error::success();
} else {

View File

@ -35,7 +35,8 @@ static Error IncludePerfEventParanoidMessageInError(Error &&error) {
Expected<std::unique_ptr<IntelPTMultiCoreTrace>>
IntelPTMultiCoreTrace::StartOnAllCores(const TraceIntelPTStartRequest &request,
NativeProcessProtocol &process) {
NativeProcessProtocol &process,
Optional<int> cgroup_fd) {
Expected<ArrayRef<cpu_id_t>> cpu_ids = GetAvailableLogicalCoreIDs();
if (!cpu_ids)
return cpu_ids.takeError();
@ -52,7 +53,7 @@ IntelPTMultiCoreTrace::StartOnAllCores(const TraceIntelPTStartRequest &request,
for (cpu_id_t cpu_id : *cpu_ids) {
Expected<IntelPTSingleBufferTrace> core_trace =
IntelPTSingleBufferTrace::Start(request, /*tid=*/None, cpu_id,
/*disabled=*/true);
/*disabled=*/true, cgroup_fd);
if (!core_trace)
return IncludePerfEventParanoidMessageInError(core_trace.takeError());
@ -68,7 +69,7 @@ IntelPTMultiCoreTrace::StartOnAllCores(const TraceIntelPTStartRequest &request,
}
return std::unique_ptr<IntelPTMultiCoreTrace>(
new IntelPTMultiCoreTrace(std::move(traces), process));
new IntelPTMultiCoreTrace(std::move(traces), process, (bool)cgroup_fd));
}
void IntelPTMultiCoreTrace::ForEachCore(
@ -106,6 +107,7 @@ void IntelPTMultiCoreTrace::ProcessWillResume() {
TraceIntelPTGetStateResponse IntelPTMultiCoreTrace::GetState() {
TraceIntelPTGetStateResponse state;
state.using_cgroup_filtering = m_using_cgroup_filtering;
for (NativeThreadProtocol &thread : m_process.Threads())
state.traced_threads.push_back(

View File

@ -35,12 +35,18 @@ public:
/// \param[in] process
/// The process being debugged.
///
/// \param[in] cgroup_fd
/// A file descriptor in /sys/fs associated with the cgroup of the process to
/// trace. If not \a llvm::None, then the trace sesion will use cgroup
/// filtering.
///
/// \return
/// An \a IntelPTMultiCoreTrace instance if tracing was successful, or
/// an \a llvm::Error otherwise.
static llvm::Expected<std::unique_ptr<IntelPTMultiCoreTrace>>
StartOnAllCores(const TraceIntelPTStartRequest &request,
NativeProcessProtocol &process);
NativeProcessProtocol &process,
llvm::Optional<int> cgroup_fd = llvm::None);
/// Execute the provided callback on each core that is being traced.
///
@ -90,8 +96,9 @@ private:
llvm::DenseMap<lldb::cpu_id_t,
std::pair<IntelPTSingleBufferTrace, ContextSwitchTrace>>
&&traces_per_core,
NativeProcessProtocol &process)
: m_traces_per_core(std::move(traces_per_core)), m_process(process) {}
NativeProcessProtocol &process, bool using_cgroup_filtering)
: m_traces_per_core(std::move(traces_per_core)), m_process(process),
m_using_cgroup_filtering(using_cgroup_filtering) {}
llvm::DenseMap<lldb::cpu_id_t,
std::pair<IntelPTSingleBufferTrace, ContextSwitchTrace>>
@ -99,6 +106,7 @@ private:
/// The target process.
NativeProcessProtocol &m_process;
bool m_using_cgroup_filtering;
};
} // namespace process_linux

View File

@ -231,10 +231,9 @@ Expected<std::vector<uint8_t>> IntelPTSingleBufferTrace::GetIptTrace() {
return m_perf_event.GetReadOnlyAuxBuffer();
}
Expected<IntelPTSingleBufferTrace>
IntelPTSingleBufferTrace::Start(const TraceIntelPTStartRequest &request,
Optional<lldb::tid_t> tid,
Optional<cpu_id_t> cpu_id, bool disabled) {
Expected<IntelPTSingleBufferTrace> IntelPTSingleBufferTrace::Start(
const TraceIntelPTStartRequest &request, Optional<lldb::tid_t> tid,
Optional<cpu_id_t> cpu_id, bool disabled, Optional<int> cgroup_fd) {
#ifndef PERF_ATTR_SIZE_VER5
return createStringError(inconvertibleErrorCode(),
"Intel PT Linux perf event not supported");
@ -265,8 +264,14 @@ IntelPTSingleBufferTrace::Start(const TraceIntelPTStartRequest &request,
LLDB_LOG(log, "Will create intel pt trace buffer of size {0}",
request.ipt_trace_size);
unsigned long flags = 0;
if (cgroup_fd) {
tid = *cgroup_fd;
flags |= PERF_FLAG_PID_CGROUP;
}
if (Expected<PerfEvent> perf_event = PerfEvent::Init(*attr, tid, cpu_id)) {
if (Expected<PerfEvent> perf_event =
PerfEvent::Init(*attr, tid, cpu_id, -1, flags)) {
if (Error mmap_err = perf_event->MmapMetadataAndBuffers(
/*num_data_pages=*/0, aux_buffer_numpages,
/*data_buffer_write=*/true)) {

View File

@ -44,6 +44,11 @@ public:
/// Similarly, if \b false, data is collected right away until \a Pause is
/// invoked.
///
/// \param[in] cgroup_fd
/// A file descriptor in /sys/fs associated with the cgroup of the process
/// to trace. If not \a llvm::None, then the trace sesion will use cgroup
/// filtering.
///
/// \return
/// A \a IntelPTSingleBufferTrace instance if tracing was successful, or
/// an \a llvm::Error otherwise.
@ -51,7 +56,7 @@ public:
Start(const TraceIntelPTStartRequest &request,
llvm::Optional<lldb::tid_t> tid,
llvm::Optional<lldb::cpu_id_t> cpu_id = llvm::None,
bool disabled = false);
bool disabled = false, llvm::Optional<int> cgroup_fd = llvm::None);
/// \return
/// The bytes requested by a jLLDBTraceGetBinaryData packet that was routed

View File

@ -122,6 +122,10 @@ Status CommandObjectProcessTraceStartIntelPT::CommandOptions::SetOptionValue(
m_per_cpu_tracing = true;
break;
}
case 'd': {
m_disable_cgroup_filtering = true;
break;
}
case 'p': {
int64_t psb_period;
if (option_arg.empty() || option_arg.getAsInteger(0, psb_period) ||
@ -145,6 +149,7 @@ void CommandObjectProcessTraceStartIntelPT::CommandOptions::
m_enable_tsc = kDefaultEnableTscValue;
m_psb_period = kDefaultPsbPeriod;
m_per_cpu_tracing = kDefaultPerCpuTracing;
m_disable_cgroup_filtering = kDefaultDisableCgroupFiltering;
}
llvm::ArrayRef<OptionDefinition>
@ -154,10 +159,10 @@ CommandObjectProcessTraceStartIntelPT::CommandOptions::GetDefinitions() {
bool CommandObjectProcessTraceStartIntelPT::DoExecute(
Args &command, CommandReturnObject &result) {
if (Error err = m_trace.Start(m_options.m_ipt_trace_size,
m_options.m_process_buffer_size_limit,
m_options.m_enable_tsc, m_options.m_psb_period,
m_options.m_per_cpu_tracing))
if (Error err = m_trace.Start(
m_options.m_ipt_trace_size, m_options.m_process_buffer_size_limit,
m_options.m_enable_tsc, m_options.m_psb_period,
m_options.m_per_cpu_tracing, m_options.m_disable_cgroup_filtering))
result.SetError(Status(std::move(err)));
else
result.SetStatus(eReturnStatusSuccessFinishResult);

View File

@ -79,6 +79,7 @@ public:
bool m_enable_tsc;
llvm::Optional<uint64_t> m_psb_period;
bool m_per_cpu_tracing;
bool m_disable_cgroup_filtering;
};
CommandObjectProcessTraceStartIntelPT(TraceIntelPT &trace,

View File

@ -226,6 +226,12 @@ void TraceIntelPT::DumpTraceInfo(Thread &thread, Stream &s, bool verbose) {
s.Format(
" Number of continuous executions for this thread: {0}\n",
storage.multicpu_decoder->GetNumContinuousExecutionsForThread(tid));
s.Format(" Total number of PSB blocks found: {0}\n",
storage.multicpu_decoder->GetTotalPSBBlocksCount());
s.Format(" Number of PSB blocks for this thread {0}\n",
storage.multicpu_decoder->GePSBBlocksCountForThread(tid));
s.Format(" Total number of unattributed PSB blocks found: {0}\n",
storage.multicpu_decoder->GetUnattributedPSBBlocksCount());
}
// Errors
@ -408,17 +414,22 @@ const char *TraceIntelPT::GetStartConfigurationHelp() {
[process tracing only]
- int processBufferSizeLimit (defaults to {4} MiB):
[process tracing only]
- boolean disableCgroupFiltering (default to {5}):
[process tracing only])",
kDefaultIptTraceSize, kDefaultEnableTscValue,
kDefaultPsbPeriod, kDefaultPerCpuTracing,
kDefaultProcessBufferSizeLimit / 1024 / 1024));
kDefaultProcessBufferSizeLimit / 1024 / 1024,
kDefaultDisableCgroupFiltering));
}
return message->c_str();
}
Error TraceIntelPT::Start(uint64_t ipt_trace_size,
uint64_t total_buffer_size_limit, bool enable_tsc,
Optional<uint64_t> psb_period, bool per_cpu_tracing) {
Optional<uint64_t> psb_period, bool per_cpu_tracing,
bool disable_cgroup_filtering) {
TraceIntelPTStartRequest request;
request.ipt_trace_size = ipt_trace_size;
request.process_buffer_size_limit = total_buffer_size_limit;
@ -426,6 +437,7 @@ Error TraceIntelPT::Start(uint64_t ipt_trace_size,
request.psb_period = psb_period;
request.type = GetPluginName().str();
request.per_cpu_tracing = per_cpu_tracing;
request.disable_cgroup_filtering = disable_cgroup_filtering;
return Trace::Start(toJSON(request));
}
@ -435,6 +447,7 @@ Error TraceIntelPT::Start(StructuredData::ObjectSP configuration) {
bool enable_tsc = kDefaultEnableTscValue;
Optional<uint64_t> psb_period = kDefaultPsbPeriod;
bool per_cpu_tracing = kDefaultPerCpuTracing;
bool disable_cgroup_filtering = kDefaultDisableCgroupFiltering;
if (configuration) {
if (StructuredData::Dictionary *dict = configuration->GetAsDictionary()) {
@ -444,6 +457,8 @@ Error TraceIntelPT::Start(StructuredData::ObjectSP configuration) {
dict->GetValueForKeyAsBoolean("enableTsc", enable_tsc);
dict->GetValueForKeyAsInteger("psbPeriod", psb_period);
dict->GetValueForKeyAsBoolean("perCpuTracing", per_cpu_tracing);
dict->GetValueForKeyAsBoolean("disableCgroupFiltering",
disable_cgroup_filtering);
} else {
return createStringError(inconvertibleErrorCode(),
"configuration object is not a dictionary");
@ -451,7 +466,7 @@ Error TraceIntelPT::Start(StructuredData::ObjectSP configuration) {
}
return Start(ipt_trace_size, process_buffer_size_limit, enable_tsc,
psb_period, per_cpu_tracing);
psb_period, per_cpu_tracing, disable_cgroup_filtering);
}
llvm::Error TraceIntelPT::Start(llvm::ArrayRef<lldb::tid_t> tids,

View File

@ -105,12 +105,16 @@ public:
/// This value defines whether to have an intel pt trace buffer per thread
/// or per cpu core.
///
/// \param[in] disable_cgroup_filtering
/// Disable the cgroup filtering that is automatically applied when doing
/// per cpu tracing.
///
/// \return
/// \a llvm::Error::success if the operation was successful, or
/// \a llvm::Error otherwise.
llvm::Error Start(uint64_t ipt_trace_size, uint64_t total_buffer_size_limit,
bool enable_tsc, llvm::Optional<uint64_t> psb_period,
bool m_per_cpu_tracing);
bool m_per_cpu_tracing, bool disable_cgroup_filtering);
/// \copydoc Trace::Start
llvm::Error Start(StructuredData::ObjectSP configuration =

View File

@ -21,6 +21,7 @@ const size_t kDefaultProcessBufferSizeLimit = 5 * 1024 * 1024; // 500MB
const bool kDefaultEnableTscValue = false;
const llvm::Optional<size_t> kDefaultPsbPeriod = llvm::None;
const bool kDefaultPerCpuTracing = false;
const bool kDefaultDisableCgroupFiltering = false;
} // namespace trace_intel_pt
} // namespace lldb_private

View File

@ -110,6 +110,7 @@ TraceIntelPTMultiCpuDecoder::DoCorrelateContextSwitchesAndIntelPtTraces() {
if (!intel_pt_subtraces)
return intel_pt_subtraces.takeError();
m_total_psb_blocks += intel_pt_subtraces->size();
// We'll be iterating through the thread continuous executions and the intel
// pt subtraces sorted by time.
auto it = intel_pt_subtraces->begin();
@ -123,7 +124,7 @@ TraceIntelPTMultiCpuDecoder::DoCorrelateContextSwitchesAndIntelPtTraces() {
if (it->tsc > thread_execution.GetStartTSC()) {
execution.intelpt_subtraces.push_back(*it);
} else {
m_unattributed_intelpt_subtraces++;
m_unattributed_psb_blocks++;
}
}
continuous_executions_per_thread[thread_execution.tid].push_back(
@ -142,6 +143,8 @@ TraceIntelPTMultiCpuDecoder::DoCorrelateContextSwitchesAndIntelPtTraces() {
});
if (err)
return std::move(err);
m_unattributed_psb_blocks += intel_pt_subtraces->end() - it;
}
// We now sort the executions of each thread to have them ready for
// instruction decoding
@ -192,3 +195,22 @@ size_t TraceIntelPTMultiCpuDecoder::GetTotalContinuousExecutionsCount() const {
count += kv.second.size();
return count;
}
size_t
TraceIntelPTMultiCpuDecoder::GePSBBlocksCountForThread(lldb::tid_t tid) const {
if (!m_continuous_executions_per_thread)
return 0;
size_t count = 0;
auto it = m_continuous_executions_per_thread->find(tid);
for (const IntelPTThreadContinousExecution &execution : it->second)
count += execution.intelpt_subtraces.size();
return count;
}
size_t TraceIntelPTMultiCpuDecoder::GetUnattributedPSBBlocksCount() const {
return m_unattributed_psb_blocks;
}
size_t TraceIntelPTMultiCpuDecoder::GetTotalPSBBlocksCount() const {
return m_total_psb_blocks;
}

View File

@ -49,10 +49,23 @@ public:
/// The number of continuous executions found for the given \p tid.
size_t GetNumContinuousExecutionsForThread(lldb::tid_t tid) const;
/// \return
/// The number of PSB blocks for a given thread in all cores.
size_t GePSBBlocksCountForThread(lldb::tid_t tid) const;
/// \return
/// The total number of continuous executions found across CPUs.
size_t GetTotalContinuousExecutionsCount() const;
/// \return
/// The number of psb blocks in all cores that couldn't be matched with a
/// thread execution coming from context switch traces.
size_t GetUnattributedPSBBlocksCount() const;
/// \return
/// The total number of PSB blocks in all cores.
size_t GetTotalPSBBlocksCount() const;
private:
/// Traverse the context switch traces and the basic intel pt continuous
/// subtraces and produce a list of continuous executions for each process and
@ -80,7 +93,8 @@ private:
/// This variable will be non-None if a severe error happened during the setup
/// of the decoder and we don't want decoding to be reattempted.
llvm::Optional<std::string> m_setup_error;
uint64_t m_unattributed_intelpt_subtraces;
uint64_t m_unattributed_psb_blocks = 0;
uint64_t m_total_psb_blocks = 0;
};
} // namespace trace_intel_pt

View File

@ -89,4 +89,9 @@ let Command = "process trace start intel pt" in {
"converted to the approximate number of raw trace bytes between PSB "
"packets as: 2 ^ (value + 11), e.g. value 3 means 16KiB between PSB "
"packets. Defaults to 0 if supported.">;
def process_trace_start_intel_pt_disable_cgroup_filtering:
Option<"disable-cgroup-filtering", "d">,
Desc<"Disable the automatic cgroup filtering that is applied if --per-cpu "
"is provided. Cgroup filtering allows collecting intel pt data "
"exclusively of processes of the same cgroup as the target.">;
}

View File

@ -53,7 +53,8 @@ bool fromJSON(const json::Value &value, TraceIntelPTStartRequest &packet,
if (packet.IsProcessTracing()) {
if (!o.map("processBufferSizeLimit", packet.process_buffer_size_limit) ||
!o.map("perCpuTracing", packet.per_cpu_tracing))
!o.map("perCpuTracing", packet.per_cpu_tracing) ||
!o.map("disableCgroupTracing", packet.disable_cgroup_filtering))
return false;
}
return true;
@ -67,6 +68,7 @@ json::Value toJSON(const TraceIntelPTStartRequest &packet) {
obj.try_emplace("psbPeriod", packet.psb_period);
obj.try_emplace("enableTsc", packet.enable_tsc);
obj.try_emplace("perCpuTracing", packet.per_cpu_tracing);
obj.try_emplace("disableCgroupTracing", packet.disable_cgroup_filtering);
return base;
}
@ -108,13 +110,15 @@ bool fromJSON(const json::Value &value, TraceIntelPTGetStateResponse &packet,
json::Path path) {
ObjectMapper o(value, path);
return o && fromJSON(value, (TraceGetStateResponse &)packet, path) &&
o.map("tscPerfZeroConversion", packet.tsc_perf_zero_conversion);
o.map("tscPerfZeroConversion", packet.tsc_perf_zero_conversion) &&
o.map("usingCgroupFiltering", packet.using_cgroup_filtering);
}
json::Value toJSON(const TraceIntelPTGetStateResponse &packet) {
json::Value base = toJSON((const TraceGetStateResponse &)packet);
base.getAsObject()->insert(
{"tscPerfZeroConversion", packet.tsc_perf_zero_conversion});
json::Object &obj = *base.getAsObject();
obj.insert({"tscPerfZeroConversion", packet.tsc_perf_zero_conversion});
obj.insert({"usingCgroupFiltering", packet.using_cgroup_filtering});
return base;
}