[trace][intelpt] Support system-wide tracing [22] - Some final touches

Having a member variable TraceIntelPT * makes it look as if it was
optional. I'm using instead a weak_ptr to indicate that it's not
optional and the object is under the ownership of TraceIntelPT.

Besides that, I've simplified the Perf aux and data buffers copying by
using vector.insert.

I'm also renaming Lookup2 to Lookup. The 2 in the name is confusing.

Differential Revision: https://reviews.llvm.org/D127881
This commit is contained in:
Walter Erquinigo 2022-06-15 10:35:00 -07:00
parent 9f45f23d86
commit ea37cd52d1
9 changed files with 101 additions and 61 deletions

View File

@ -205,15 +205,13 @@ Expected<std::vector<uint8_t>> PerfEvent::GetReadOnlyDataBuffer() {
if (data_head > data_size) {
uint64_t actual_data_head = data_head % data_size;
// The buffer has wrapped
for (uint64_t i = actual_data_head; i < data_size; i++)
output.push_back(data[i]);
for (uint64_t i = 0; i < actual_data_head; i++)
output.push_back(data[i]);
// The buffer has wrapped, so we first the oldest chunk of data
output.insert(output.end(), data.begin() + actual_data_head, data.end());
// And we we read the most recent chunk of data
output.insert(output.end(), data.begin(), data.begin() + actual_data_head);
} else {
for (uint64_t i = 0; i < data_head; i++)
output.push_back(data[i]);
// There's been no wrapping, so we just read linearly
output.insert(output.end(), data.begin(), data.begin() + data_head);
}
if (was_enabled) {
@ -238,7 +236,6 @@ Expected<std::vector<uint8_t>> PerfEvent::GetReadOnlyAuxBuffer() {
ArrayRef<uint8_t> data = GetAuxBuffer();
uint64_t aux_head = mmap_metadata.aux_head;
uint64_t aux_size = mmap_metadata.aux_size;
std::vector<uint8_t> output;
output.reserve(data.size());
@ -254,11 +251,8 @@ Expected<std::vector<uint8_t>> PerfEvent::GetReadOnlyAuxBuffer() {
*
* */
for (uint64_t i = aux_head; i < aux_size; i++)
output.push_back(data[i]);
for (uint64_t i = 0; i < aux_head; i++)
output.push_back(data[i]);
output.insert(output.end(), data.begin() + aux_head, data.end());
output.insert(output.end(), data.begin(), data.begin() + aux_head);
if (was_enabled) {
if (Error err = EnableWithIoctl())

View File

@ -48,21 +48,24 @@ struct IntelPTThreadContinousExecution {
void DecodeSingleTraceForThread(DecodedThread &decoded_thread, TraceIntelPT &trace_intel_pt,
llvm::ArrayRef<uint8_t> buffer);
/// Decode a raw Intel PT trace for a single thread that was collected in a per cpu core basis.
/// Decode a raw Intel PT trace for a single thread that was collected in a per
/// cpu core basis.
///
/// \param[in] decoded_thread
/// All decoded instructions, errors and events will be appended to this object.
/// \param[out] decoded_thread
/// All decoded instructions, errors and events will be appended to this
/// object.
///
/// \param[in] trace_intel_pt
/// The main Trace object that contains all the information related to the trace session.
/// The main Trace object that contains all the information related to the
/// trace session.
///
/// \param[in] buffers
/// A map from cpu core id to raw intel pt buffers.
///
/// \param[in] executions
/// A list of chunks of timed executions of the same given thread. It is used to identify if
/// some executions have missing intel pt data and also to determine in which core a certain
/// part of the execution ocurred.
/// A list of chunks of timed executions of the same given thread. It is used
/// to identify if some executions have missing intel pt data and also to
/// determine in which core a certain part of the execution ocurred.
void DecodeSystemWideTraceForThread(
DecodedThread &decoded_thread, TraceIntelPT &trace_intel_pt,
const llvm::DenseMap<lldb::cpu_id_t, llvm::ArrayRef<uint8_t>> &buffers,

View File

@ -74,23 +74,26 @@ Expected<TraceSP> TraceIntelPT::CreateInstanceForLiveProcess(Process &process) {
return instance;
}
TraceIntelPT::TraceIntelPT(JSONTraceSession &session,
ArrayRef<ProcessSP> traced_processes,
ArrayRef<ThreadPostMortemTraceSP> traced_threads)
: Trace(traced_processes, session.GetCpuIds()),
m_cpu_info(session.cpu_info) {
m_storage.tsc_conversion = session.tsc_perf_zero_conversion;
TraceIntelPTSP TraceIntelPT::GetSharedPtr() {
return std::static_pointer_cast<TraceIntelPT>(shared_from_this());
}
TraceIntelPTSP TraceIntelPT::CreateInstanceForPostmortemTrace(
JSONTraceSession &session, ArrayRef<ProcessSP> traced_processes,
ArrayRef<ThreadPostMortemTraceSP> traced_threads) {
TraceIntelPTSP trace_sp(new TraceIntelPT(session, traced_processes));
trace_sp->m_storage.tsc_conversion = session.tsc_perf_zero_conversion;
if (session.cpus) {
std::vector<cpu_id_t> cpus;
for (const JSONCpu &cpu : *session.cpus) {
SetPostMortemCpuDataFile(cpu.id, IntelPTDataKinds::kIptTrace,
FileSpec(cpu.ipt_trace));
trace_sp->SetPostMortemCpuDataFile(cpu.id, IntelPTDataKinds::kIptTrace,
FileSpec(cpu.ipt_trace));
SetPostMortemCpuDataFile(cpu.id,
IntelPTDataKinds::kPerfContextSwitchTrace,
FileSpec(cpu.context_switch_trace));
trace_sp->SetPostMortemCpuDataFile(
cpu.id, IntelPTDataKinds::kPerfContextSwitchTrace,
FileSpec(cpu.context_switch_trace));
cpus.push_back(cpu.id);
}
@ -99,19 +102,28 @@ TraceIntelPT::TraceIntelPT(JSONTraceSession &session,
for (const JSONThread &thread : process.threads)
tids.push_back(thread.tid);
m_storage.multicpu_decoder.emplace(*this);
trace_sp->m_storage.multicpu_decoder.emplace(trace_sp);
} else {
for (const ThreadPostMortemTraceSP &thread : traced_threads) {
m_storage.thread_decoders.try_emplace(
thread->GetID(), std::make_unique<ThreadDecoder>(thread, *this));
trace_sp->m_storage.thread_decoders.try_emplace(
thread->GetID(), std::make_unique<ThreadDecoder>(thread, *trace_sp));
if (const Optional<FileSpec> &trace_file = thread->GetTraceFile()) {
SetPostMortemThreadDataFile(thread->GetID(),
IntelPTDataKinds::kIptTrace, *trace_file);
trace_sp->SetPostMortemThreadDataFile(
thread->GetID(), IntelPTDataKinds::kIptTrace, *trace_file);
}
}
}
for (const ProcessSP &process_sp : traced_processes)
process_sp->GetTarget().SetTrace(trace_sp);
return trace_sp;
}
TraceIntelPT::TraceIntelPT(JSONTraceSession &session,
ArrayRef<ProcessSP> traced_processes)
: Trace(traced_processes, session.GetCpuIds()),
m_cpu_info(session.cpu_info) {}
DecodedThreadSP TraceIntelPT::Decode(Thread &thread) {
if (const char *error = RefreshLiveProcessState())
return std::make_shared<DecodedThread>(
@ -351,7 +363,7 @@ Error TraceIntelPT::DoRefreshLiveProcessState(TraceGetStateResponse state,
if (!intelpt_state->tsc_perf_zero_conversion)
return createStringError(inconvertibleErrorCode(),
"Missing perf time_zero conversion values");
m_storage.multicpu_decoder.emplace(*this);
m_storage.multicpu_decoder.emplace(GetSharedPtr());
}
if (m_storage.tsc_conversion) {

View File

@ -37,7 +37,7 @@ public:
static void Terminate();
/// Create an instance of this class.
/// Create an instance of this class from a trace bundle.
///
/// \param[in] trace_session_file
/// The contents of the trace session file. See \a Trace::FindPlugin.
@ -158,6 +158,8 @@ public:
/// The timer object for this trace.
TaskTimer &GetTimer();
TraceIntelPTSP GetSharedPtr();
private:
friend class TraceIntelPTSessionFileParser;
@ -174,9 +176,20 @@ private:
/// \param[in] trace_threads
/// The threads traced in the live session. They must belong to the
/// processes mentioned above.
///
/// \return
/// A TraceIntelPT shared pointer instance.
/// \{
static TraceIntelPTSP CreateInstanceForPostmortemTrace(
JSONTraceSession &session,
llvm::ArrayRef<lldb::ProcessSP> traced_processes,
llvm::ArrayRef<lldb::ThreadPostMortemTraceSP> traced_threads);
/// This constructor is used by CreateInstanceForPostmortemTrace to get the
/// instance ready before using shared pointers, which is a limitation of C++.
TraceIntelPT(JSONTraceSession &session,
llvm::ArrayRef<lldb::ProcessSP> traced_processes,
llvm::ArrayRef<lldb::ThreadPostMortemTraceSP> traced_threads);
llvm::ArrayRef<lldb::ProcessSP> traced_processes);
/// \}
/// Constructor for live processes
TraceIntelPT(Process &live_process) : Trace(live_process){};

View File

@ -17,15 +17,20 @@ using namespace lldb_private;
using namespace lldb_private::trace_intel_pt;
using namespace llvm;
TraceIntelPTMultiCpuDecoder::TraceIntelPTMultiCpuDecoder(TraceIntelPT &trace)
: m_trace(&trace) {
for (Process *proc : trace.GetAllProcesses()) {
TraceIntelPTMultiCpuDecoder::TraceIntelPTMultiCpuDecoder(
TraceIntelPTSP trace_sp)
: m_trace_wp(trace_sp) {
for (Process *proc : trace_sp->GetAllProcesses()) {
for (ThreadSP thread_sp : proc->GetThreadList().Threads()) {
m_tids.insert(thread_sp->GetID());
}
}
}
TraceIntelPTSP TraceIntelPTMultiCpuDecoder::GetTrace() {
return m_trace_wp.lock();
}
bool TraceIntelPTMultiCpuDecoder::TracesThread(lldb::tid_t tid) const {
return m_tids.count(tid);
}
@ -41,12 +46,14 @@ DecodedThreadSP TraceIntelPTMultiCpuDecoder::Decode(Thread &thread) {
DecodedThreadSP decoded_thread_sp =
std::make_shared<DecodedThread>(thread.shared_from_this());
Error err = m_trace->OnAllCpusBinaryDataRead(
TraceIntelPTSP trace_sp = GetTrace();
Error err = trace_sp->OnAllCpusBinaryDataRead(
IntelPTDataKinds::kIptTrace,
[&](const DenseMap<cpu_id_t, ArrayRef<uint8_t>> &buffers) -> Error {
auto it = m_continuous_executions_per_thread->find(thread.GetID());
if (it != m_continuous_executions_per_thread->end())
DecodeSystemWideTraceForThread(*decoded_thread_sp, *m_trace, buffers,
DecodeSystemWideTraceForThread(*decoded_thread_sp, *trace_sp, buffers,
it->second);
return Error::success();
@ -81,9 +88,10 @@ Expected<DenseMap<lldb::tid_t, std::vector<IntelPTThreadContinousExecution>>>
TraceIntelPTMultiCpuDecoder::DoCorrelateContextSwitchesAndIntelPtTraces() {
DenseMap<lldb::tid_t, std::vector<IntelPTThreadContinousExecution>>
continuous_executions_per_thread;
TraceIntelPTSP trace_sp = GetTrace();
Optional<LinuxPerfZeroTscConversion> conv_opt =
m_trace->GetPerfZeroTscConversion();
trace_sp->GetPerfZeroTscConversion();
if (!conv_opt)
return createStringError(
inconvertibleErrorCode(),
@ -91,9 +99,9 @@ TraceIntelPTMultiCpuDecoder::DoCorrelateContextSwitchesAndIntelPtTraces() {
LinuxPerfZeroTscConversion tsc_conversion = *conv_opt;
for (cpu_id_t cpu_id : m_trace->GetTracedCpus()) {
for (cpu_id_t cpu_id : trace_sp->GetTracedCpus()) {
Expected<std::vector<IntelPTThreadSubtrace>> intel_pt_subtraces =
GetIntelPTSubtracesForCpu(*m_trace, cpu_id);
GetIntelPTSubtracesForCpu(*trace_sp, cpu_id);
if (!intel_pt_subtraces)
return intel_pt_subtraces.takeError();
@ -116,7 +124,7 @@ TraceIntelPTMultiCpuDecoder::DoCorrelateContextSwitchesAndIntelPtTraces() {
continuous_executions_per_thread[thread_execution.tid].push_back(
execution);
};
Error err = m_trace->OnCpuBinaryDataRead(
Error err = trace_sp->OnCpuBinaryDataRead(
cpu_id, IntelPTDataKinds::kPerfContextSwitchTrace,
[&](ArrayRef<uint8_t> data) -> Error {
Expected<std::vector<ThreadContinuousExecution>> executions =
@ -145,7 +153,7 @@ Error TraceIntelPTMultiCpuDecoder::CorrelateContextSwitchesAndIntelPtTraces() {
if (m_continuous_executions_per_thread)
return Error::success();
Error err = m_trace->GetTimer().ForGlobal().TimeTask<Error>(
Error err = GetTrace()->GetTimer().ForGlobal().TimeTask<Error>(
"Context switch and Intel PT traces correlation", [&]() -> Error {
if (auto correlation = DoCorrelateContextSwitchesAndIntelPtTraces()) {
m_continuous_executions_per_thread.emplace(std::move(*correlation));

View File

@ -12,6 +12,7 @@
#include "LibiptDecoder.h"
#include "PerfContextSwitchDecoder.h"
#include "ThreadDecoder.h"
#include "forward-declarations.h"
namespace lldb_private {
namespace trace_intel_pt {
@ -31,7 +32,7 @@ class TraceIntelPTMultiCpuDecoder {
public:
/// \param[in] TraceIntelPT
/// The trace object to be decoded
TraceIntelPTMultiCpuDecoder(TraceIntelPT &trace);
TraceIntelPTMultiCpuDecoder(TraceIntelPTSP trace_sp);
/// \return
/// A \a DecodedThread for the \p thread by decoding its instructions on all
@ -67,7 +68,9 @@ private:
llvm::DenseMap<lldb::tid_t, std::vector<IntelPTThreadContinousExecution>>>
DoCorrelateContextSwitchesAndIntelPtTraces();
TraceIntelPT *m_trace;
TraceIntelPTSP GetTrace();
std::weak_ptr<TraceIntelPT> m_trace_wp;
std::set<lldb::tid_t> m_tids;
llvm::Optional<
llvm::DenseMap<lldb::tid_t, std::vector<IntelPTThreadContinousExecution>>>

View File

@ -284,7 +284,8 @@ Expected<TraceSP> TraceIntelPTSessionFileParser::CreateTraceIntelPTInstance(
parsed_process.threads.end());
}
TraceSP trace_instance(new TraceIntelPT(session, processes, threads));
TraceSP trace_instance = TraceIntelPT::CreateInstanceForPostmortemTrace(
session, processes, threads);
for (const ParsedProcess &parsed_process : parsed_processes)
parsed_process.target_sp->SetTrace(trace_instance);

View File

@ -9,12 +9,16 @@
#ifndef LLDB_SOURCE_PLUGINS_TRACE_INTEL_PT_FORWARD_DECLARATIONS_H
#define LLDB_SOURCE_PLUGINS_TRACE_INTEL_PT_FORWARD_DECLARATIONS_H
#include <memory>
namespace lldb_private {
namespace trace_intel_pt {
class TraceIntelPT;
class ThreadDecoder;
using TraceIntelPTSP = std::shared_ptr<TraceIntelPT>;
} // namespace trace_intel_pt
} // namespace lldb_private
#endif // LLDB_SOURCE_PLUGINS_TRACE_INTEL_PT_FORWARD_DECLARATIONS_H

View File

@ -63,16 +63,18 @@ static V *LookupAsPtr(DenseMap<K, V> &map, K k) {
return &it->second;
}
/// Similar to the methods above but it looks for an item in a map of maps.
template <typename K1, typename K2, typename V>
static Optional<V> Lookup2(DenseMap<K1, DenseMap<K2, V>> &map, K1 k1, K2 k2) {
static Optional<V> Lookup(DenseMap<K1, DenseMap<K2, V>> &map, K1 k1, K2 k2) {
auto it = map.find(k1);
if (it == map.end())
return None;
return Lookup(it->second, k2);
}
/// Similar to the methods above but it looks for an item in a map of maps.
template <typename K1, typename K2, typename V>
static V *Lookup2AsPtr(DenseMap<K1, DenseMap<K2, V>> &map, K1 k1, K2 k2) {
static V *LookupAsPtr(DenseMap<K1, DenseMap<K2, V>> &map, K1 k1, K2 k2) {
auto it = map.find(k1);
if (it == map.end())
return nullptr;
@ -159,13 +161,13 @@ Expected<std::string> Trace::GetLiveProcessState() {
Optional<uint64_t> Trace::GetLiveThreadBinaryDataSize(lldb::tid_t tid,
llvm::StringRef kind) {
Storage &storage = GetUpdatedStorage();
return Lookup2(storage.live_thread_data, tid, ConstString(kind));
return Lookup(storage.live_thread_data, tid, ConstString(kind));
}
Optional<uint64_t> Trace::GetLiveCpuBinaryDataSize(lldb::cpu_id_t cpu_id,
llvm::StringRef kind) {
Storage &storage = GetUpdatedStorage();
return Lookup2(storage.live_cpu_data_sizes, cpu_id, ConstString(kind));
return Lookup(storage.live_cpu_data_sizes, cpu_id, ConstString(kind));
}
Optional<uint64_t> Trace::GetLiveProcessBinaryDataSize(llvm::StringRef kind) {
@ -345,7 +347,7 @@ llvm::Expected<FileSpec>
Trace::GetPostMortemThreadDataFile(lldb::tid_t tid, llvm::StringRef kind) {
Storage &storage = GetUpdatedStorage();
if (Optional<FileSpec> file =
Lookup2(storage.postmortem_thread_data, tid, ConstString(kind)))
Lookup(storage.postmortem_thread_data, tid, ConstString(kind)))
return *file;
else
return createStringError(
@ -358,7 +360,7 @@ llvm::Expected<FileSpec> Trace::GetPostMortemCpuDataFile(lldb::cpu_id_t cpu_id,
llvm::StringRef kind) {
Storage &storage = GetUpdatedStorage();
if (Optional<FileSpec> file =
Lookup2(storage.postmortem_cpu_data, cpu_id, ConstString(kind)))
Lookup(storage.postmortem_cpu_data, cpu_id, ConstString(kind)))
return *file;
else
return createStringError(
@ -393,7 +395,7 @@ llvm::Error Trace::OnLiveCpuBinaryDataRead(lldb::cpu_id_t cpu_id,
OnBinaryDataReadCallback callback) {
Storage &storage = GetUpdatedStorage();
if (std::vector<uint8_t> *cpu_data =
Lookup2AsPtr(storage.live_cpu_data, cpu_id, ConstString(kind)))
LookupAsPtr(storage.live_cpu_data, cpu_id, ConstString(kind)))
return callback(*cpu_data);
Expected<std::vector<uint8_t>> data = GetLiveCpuBinaryData(cpu_id, kind);