llvm-project/compiler-rt/lib/xray/xray_profiling.cpp

520 lines
18 KiB
C++

//===-- xray_profiling.cpp --------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file is a part of XRay, a dynamic runtime instrumentation system.
//
// This is the implementation of a profiling handler.
//
//===----------------------------------------------------------------------===//
#include <memory>
#include <time.h>
#include "sanitizer_common/sanitizer_atomic.h"
#include "sanitizer_common/sanitizer_flags.h"
#include "xray/xray_interface.h"
#include "xray/xray_log_interface.h"
#include "xray_buffer_queue.h"
#include "xray_flags.h"
#include "xray_profile_collector.h"
#include "xray_profiling_flags.h"
#include "xray_recursion_guard.h"
#include "xray_tsc.h"
#include "xray_utils.h"
#include <pthread.h>
namespace __xray {
namespace {
static atomic_sint32_t ProfilerLogFlushStatus = {
XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
static atomic_sint32_t ProfilerLogStatus = {
XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
static SpinMutex ProfilerOptionsMutex;
struct ProfilingData {
atomic_uintptr_t Allocators;
atomic_uintptr_t FCT;
};
static pthread_key_t ProfilingKey;
// We use a global buffer queue, which gets initialized once at initialisation
// time, and gets reset when profiling is "done".
static std::aligned_storage<sizeof(BufferQueue), alignof(BufferQueue)>::type
BufferQueueStorage;
static BufferQueue *BQ = nullptr;
thread_local FunctionCallTrie::Allocators::Buffers ThreadBuffers;
thread_local std::aligned_storage<sizeof(FunctionCallTrie::Allocators),
alignof(FunctionCallTrie::Allocators)>::type
AllocatorsStorage;
thread_local std::aligned_storage<sizeof(FunctionCallTrie),
alignof(FunctionCallTrie)>::type
FunctionCallTrieStorage;
thread_local ProfilingData TLD{{0}, {0}};
thread_local atomic_uint8_t ReentranceGuard{0};
// We use a separate guard for ensuring that for this thread, if we're already
// cleaning up, that any signal handlers don't attempt to cleanup nor
// initialise.
thread_local atomic_uint8_t TLDInitGuard{0};
// We also use a separate latch to signal that the thread is exiting, and
// non-essential work should be ignored (things like recording events, etc.).
thread_local atomic_uint8_t ThreadExitingLatch{0};
static ProfilingData *getThreadLocalData() XRAY_NEVER_INSTRUMENT {
thread_local auto ThreadOnce = []() XRAY_NEVER_INSTRUMENT {
pthread_setspecific(ProfilingKey, &TLD);
return false;
}();
(void)ThreadOnce;
RecursionGuard TLDInit(TLDInitGuard);
if (!TLDInit)
return nullptr;
if (atomic_load_relaxed(&ThreadExitingLatch))
return nullptr;
uptr Allocators = 0;
if (atomic_compare_exchange_strong(&TLD.Allocators, &Allocators, 1,
memory_order_acq_rel)) {
bool Success = false;
auto AllocatorsUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
if (!Success)
atomic_store(&TLD.Allocators, 0, memory_order_release);
});
// Acquire a set of buffers for this thread.
if (BQ == nullptr)
return nullptr;
if (BQ->getBuffer(ThreadBuffers.NodeBuffer) != BufferQueue::ErrorCode::Ok)
return nullptr;
auto NodeBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
if (!Success)
BQ->releaseBuffer(ThreadBuffers.NodeBuffer);
});
if (BQ->getBuffer(ThreadBuffers.RootsBuffer) != BufferQueue::ErrorCode::Ok)
return nullptr;
auto RootsBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
if (!Success)
BQ->releaseBuffer(ThreadBuffers.RootsBuffer);
});
if (BQ->getBuffer(ThreadBuffers.ShadowStackBuffer) !=
BufferQueue::ErrorCode::Ok)
return nullptr;
auto ShadowStackBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
if (!Success)
BQ->releaseBuffer(ThreadBuffers.ShadowStackBuffer);
});
if (BQ->getBuffer(ThreadBuffers.NodeIdPairBuffer) !=
BufferQueue::ErrorCode::Ok)
return nullptr;
Success = true;
new (&AllocatorsStorage) FunctionCallTrie::Allocators(
FunctionCallTrie::InitAllocatorsFromBuffers(ThreadBuffers));
Allocators = reinterpret_cast<uptr>(
reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage));
atomic_store(&TLD.Allocators, Allocators, memory_order_release);
}
if (Allocators == 1)
return nullptr;
uptr FCT = 0;
if (atomic_compare_exchange_strong(&TLD.FCT, &FCT, 1, memory_order_acq_rel)) {
new (&FunctionCallTrieStorage)
FunctionCallTrie(*reinterpret_cast<FunctionCallTrie::Allocators *>(
atomic_load_relaxed(&TLD.Allocators)));
FCT = reinterpret_cast<uptr>(
reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage));
atomic_store(&TLD.FCT, FCT, memory_order_release);
}
if (FCT == 1)
return nullptr;
return &TLD;
}
static void cleanupTLD() XRAY_NEVER_INSTRUMENT {
auto FCT = atomic_exchange(&TLD.FCT, 0, memory_order_acq_rel);
if (FCT == reinterpret_cast<uptr>(reinterpret_cast<FunctionCallTrie *>(
&FunctionCallTrieStorage)))
reinterpret_cast<FunctionCallTrie *>(FCT)->~FunctionCallTrie();
auto Allocators = atomic_exchange(&TLD.Allocators, 0, memory_order_acq_rel);
if (Allocators ==
reinterpret_cast<uptr>(
reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)))
reinterpret_cast<FunctionCallTrie::Allocators *>(Allocators)->~Allocators();
}
static void postCurrentThreadFCT(ProfilingData &T) XRAY_NEVER_INSTRUMENT {
RecursionGuard TLDInit(TLDInitGuard);
if (!TLDInit)
return;
uptr P = atomic_exchange(&T.FCT, 0, memory_order_acq_rel);
if (P != reinterpret_cast<uptr>(
reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage)))
return;
auto FCT = reinterpret_cast<FunctionCallTrie *>(P);
DCHECK_NE(FCT, nullptr);
uptr A = atomic_exchange(&T.Allocators, 0, memory_order_acq_rel);
if (A !=
reinterpret_cast<uptr>(
reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)))
return;
auto Allocators = reinterpret_cast<FunctionCallTrie::Allocators *>(A);
DCHECK_NE(Allocators, nullptr);
// Always move the data into the profile collector.
profileCollectorService::post(BQ, std::move(*FCT), std::move(*Allocators),
std::move(ThreadBuffers), GetTid());
// Re-initialize the ThreadBuffers object to a known "default" state.
ThreadBuffers = FunctionCallTrie::Allocators::Buffers{};
}
} // namespace
const char *profilingCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT {
#ifdef XRAY_PROFILER_DEFAULT_OPTIONS
return SANITIZER_STRINGIFY(XRAY_PROFILER_DEFAULT_OPTIONS);
#else
return "";
#endif
}
XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT {
if (atomic_load(&ProfilerLogStatus, memory_order_acquire) !=
XRayLogInitStatus::XRAY_LOG_FINALIZED) {
if (Verbosity())
Report("Not flushing profiles, profiling not been finalized.\n");
return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
}
RecursionGuard SignalGuard(ReentranceGuard);
if (!SignalGuard) {
if (Verbosity())
Report("Cannot finalize properly inside a signal handler!\n");
atomic_store(&ProfilerLogFlushStatus,
XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING,
memory_order_release);
return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
}
s32 Previous = atomic_exchange(&ProfilerLogFlushStatus,
XRayLogFlushStatus::XRAY_LOG_FLUSHING,
memory_order_acq_rel);
if (Previous == XRayLogFlushStatus::XRAY_LOG_FLUSHING) {
if (Verbosity())
Report("Not flushing profiles, implementation still flushing.\n");
return XRayLogFlushStatus::XRAY_LOG_FLUSHING;
}
// At this point, we'll create the file that will contain the profile, but
// only if the options say so.
if (!profilingFlags()->no_flush) {
// First check whether we have data in the profile collector service
// before we try and write anything down.
XRayBuffer B = profileCollectorService::nextBuffer({nullptr, 0});
if (B.Data == nullptr) {
if (Verbosity())
Report("profiling: No data to flush.\n");
} else {
LogWriter *LW = LogWriter::Open();
if (LW == nullptr) {
if (Verbosity())
Report("profiling: Failed to flush to file, dropping data.\n");
} else {
// Now for each of the buffers, write out the profile data as we would
// see it in memory, verbatim.
while (B.Data != nullptr && B.Size != 0) {
LW->WriteAll(reinterpret_cast<const char *>(B.Data),
reinterpret_cast<const char *>(B.Data) + B.Size);
B = profileCollectorService::nextBuffer(B);
}
}
LogWriter::Close(LW);
}
}
profileCollectorService::reset();
atomic_store(&ProfilerLogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
memory_order_release);
atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
memory_order_release);
return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
}
void profilingHandleArg0(int32_t FuncId,
XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
unsigned char CPU;
auto TSC = readTSC(CPU);
RecursionGuard G(ReentranceGuard);
if (!G)
return;
auto Status = atomic_load(&ProfilerLogStatus, memory_order_acquire);
if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_UNINITIALIZED ||
Status == XRayLogInitStatus::XRAY_LOG_INITIALIZING))
return;
if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_FINALIZED ||
Status == XRayLogInitStatus::XRAY_LOG_FINALIZING)) {
postCurrentThreadFCT(TLD);
return;
}
auto T = getThreadLocalData();
if (T == nullptr)
return;
auto FCT = reinterpret_cast<FunctionCallTrie *>(atomic_load_relaxed(&T->FCT));
switch (Entry) {
case XRayEntryType::ENTRY:
case XRayEntryType::LOG_ARGS_ENTRY:
FCT->enterFunction(FuncId, TSC, CPU);
break;
case XRayEntryType::EXIT:
case XRayEntryType::TAIL:
FCT->exitFunction(FuncId, TSC, CPU);
break;
default:
// FIXME: Handle bugs.
break;
}
}
void profilingHandleArg1(int32_t FuncId, XRayEntryType Entry,
uint64_t) XRAY_NEVER_INSTRUMENT {
return profilingHandleArg0(FuncId, Entry);
}
XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT {
s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED;
if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus,
XRayLogInitStatus::XRAY_LOG_FINALIZING,
memory_order_release)) {
if (Verbosity())
Report("Cannot finalize profile, the profiling is not initialized.\n");
return static_cast<XRayLogInitStatus>(CurrentStatus);
}
// Mark then finalize the current generation of buffers. This allows us to let
// the threads currently holding onto new buffers still use them, but let the
// last reference do the memory cleanup.
DCHECK_NE(BQ, nullptr);
BQ->finalize();
// Wait a grace period to allow threads to see that we're finalizing.
SleepForMillis(profilingFlags()->grace_period_ms);
// If we for some reason are entering this function from an instrumented
// handler, we bail out.
RecursionGuard G(ReentranceGuard);
if (!G)
return static_cast<XRayLogInitStatus>(CurrentStatus);
// Post the current thread's data if we have any.
postCurrentThreadFCT(TLD);
// Then we force serialize the log data.
profileCollectorService::serialize();
atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED,
memory_order_release);
return XRayLogInitStatus::XRAY_LOG_FINALIZED;
}
XRayLogInitStatus
profilingLoggingInit(size_t, size_t, void *Options,
size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
RecursionGuard G(ReentranceGuard);
if (!G)
return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus,
XRayLogInitStatus::XRAY_LOG_INITIALIZING,
memory_order_acq_rel)) {
if (Verbosity())
Report("Cannot initialize already initialised profiling "
"implementation.\n");
return static_cast<XRayLogInitStatus>(CurrentStatus);
}
{
SpinMutexLock Lock(&ProfilerOptionsMutex);
FlagParser ConfigParser;
ProfilerFlags Flags;
Flags.setDefaults();
registerProfilerFlags(&ConfigParser, &Flags);
ConfigParser.ParseString(profilingCompilerDefinedFlags());
const char *Env = GetEnv("XRAY_PROFILING_OPTIONS");
if (Env == nullptr)
Env = "";
ConfigParser.ParseString(Env);
// Then parse the configuration string provided.
ConfigParser.ParseString(static_cast<const char *>(Options));
if (Verbosity())
ReportUnrecognizedFlags();
*profilingFlags() = Flags;
}
// We need to reset the profile data collection implementation now.
profileCollectorService::reset();
// Then also reset the buffer queue implementation.
if (BQ == nullptr) {
bool Success = false;
new (&BufferQueueStorage)
BufferQueue(profilingFlags()->per_thread_allocator_max,
profilingFlags()->buffers_max, Success);
if (!Success) {
if (Verbosity())
Report("Failed to initialize preallocated memory buffers!");
atomic_store(&ProfilerLogStatus,
XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
memory_order_release);
return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
}
// If we've succeded, set the global pointer to the initialised storage.
BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage);
} else {
BQ->finalize();
auto InitStatus = BQ->init(profilingFlags()->per_thread_allocator_max,
profilingFlags()->buffers_max);
if (InitStatus != BufferQueue::ErrorCode::Ok) {
if (Verbosity())
Report("Failed to initialize preallocated memory buffers; error: %s",
BufferQueue::getErrorString(InitStatus));
atomic_store(&ProfilerLogStatus,
XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
memory_order_release);
return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
}
DCHECK(!BQ->finalizing());
}
// We need to set up the exit handlers.
static pthread_once_t Once = PTHREAD_ONCE_INIT;
pthread_once(
&Once, +[] {
pthread_key_create(
&ProfilingKey, +[](void *P) XRAY_NEVER_INSTRUMENT {
if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel))
return;
if (P == nullptr)
return;
auto T = reinterpret_cast<ProfilingData *>(P);
if (atomic_load_relaxed(&T->Allocators) == 0)
return;
{
// If we're somehow executing this while inside a
// non-reentrant-friendly context, we skip attempting to post
// the current thread's data.
RecursionGuard G(ReentranceGuard);
if (!G)
return;
postCurrentThreadFCT(*T);
}
});
// We also need to set up an exit handler, so that we can get the
// profile information at exit time. We use the C API to do this, to not
// rely on C++ ABI functions for registering exit handlers.
Atexit(+[]() XRAY_NEVER_INSTRUMENT {
if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel))
return;
auto Cleanup =
at_scope_exit([]() XRAY_NEVER_INSTRUMENT { cleanupTLD(); });
// Finalize and flush.
if (profilingFinalize() != XRAY_LOG_FINALIZED ||
profilingFlush() != XRAY_LOG_FLUSHED)
return;
if (Verbosity())
Report("XRay Profile flushed at exit.");
});
});
__xray_log_set_buffer_iterator(profileCollectorService::nextBuffer);
__xray_set_handler(profilingHandleArg0);
__xray_set_handler_arg1(profilingHandleArg1);
atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED,
memory_order_release);
if (Verbosity())
Report("XRay Profiling init successful.\n");
return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
}
bool profilingDynamicInitializer() XRAY_NEVER_INSTRUMENT {
// Set up the flag defaults from the static defaults and the
// compiler-provided defaults.
{
SpinMutexLock Lock(&ProfilerOptionsMutex);
auto *F = profilingFlags();
F->setDefaults();
FlagParser ProfilingParser;
registerProfilerFlags(&ProfilingParser, F);
ProfilingParser.ParseString(profilingCompilerDefinedFlags());
}
XRayLogImpl Impl{
profilingLoggingInit,
profilingFinalize,
profilingHandleArg0,
profilingFlush,
};
auto RegistrationResult = __xray_log_register_mode("xray-profiling", Impl);
if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) {
if (Verbosity())
Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = "
"%d\n",
RegistrationResult);
return false;
}
if (!internal_strcmp(flags()->xray_mode, "xray-profiling"))
__xray_log_select_mode("xray_profiling");
return true;
}
} // namespace __xray
static auto UNUSED Unused = __xray::profilingDynamicInitializer();