forked from OSchip/llvm-project
[XRay][compiler-rt] Profiling Mode: Flush logs on exit
Summary: This change adds support for writing out profiles at program exit. Depends on D48653. Reviewers: kpw, eizan Reviewed By: kpw Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D48956 llvm-svn: 336969
This commit is contained in:
parent
00712cb749
commit
5d92d3e5be
|
@ -30,13 +30,11 @@ struct ThreadTrie {
|
|||
tid_t TId;
|
||||
FunctionCallTrie *Trie;
|
||||
};
|
||||
Vector<ThreadTrie> ThreadTries;
|
||||
|
||||
struct ProfileBuffer {
|
||||
void *Data;
|
||||
size_t Size;
|
||||
};
|
||||
Vector<ProfileBuffer> ProfileBuffers;
|
||||
|
||||
struct BlockHeader {
|
||||
u32 BlockSize;
|
||||
|
@ -44,6 +42,10 @@ struct BlockHeader {
|
|||
u64 ThreadId;
|
||||
};
|
||||
|
||||
// These need to be pointers that point to heap/internal-allocator-allocated
|
||||
// objects because these are accessed even at program exit.
|
||||
Vector<ThreadTrie> *ThreadTries = nullptr;
|
||||
Vector<ProfileBuffer> *ProfileBuffers = nullptr;
|
||||
FunctionCallTrie::Allocators *GlobalAllocators = nullptr;
|
||||
|
||||
} // namespace
|
||||
|
@ -57,8 +59,16 @@ void post(const FunctionCallTrie &T, tid_t TId) {
|
|||
new (GlobalAllocators) FunctionCallTrie::Allocators();
|
||||
*GlobalAllocators = FunctionCallTrie::InitAllocatorsCustom(
|
||||
profilingFlags()->global_allocator_max);
|
||||
ThreadTries = reinterpret_cast<Vector<ThreadTrie> *>(
|
||||
InternalAlloc(sizeof(Vector<ThreadTrie>)));
|
||||
new (ThreadTries) Vector<ThreadTrie>();
|
||||
ProfileBuffers = reinterpret_cast<Vector<ProfileBuffer> *>(
|
||||
InternalAlloc(sizeof(Vector<ProfileBuffer>)));
|
||||
new (ProfileBuffers) Vector<ProfileBuffer>();
|
||||
});
|
||||
DCHECK_NE(GlobalAllocators, nullptr);
|
||||
DCHECK_NE(ThreadTries, nullptr);
|
||||
DCHECK_NE(ProfileBuffers, nullptr);
|
||||
|
||||
ThreadTrie *Item = nullptr;
|
||||
{
|
||||
|
@ -66,7 +76,7 @@ void post(const FunctionCallTrie &T, tid_t TId) {
|
|||
if (GlobalAllocators == nullptr)
|
||||
return;
|
||||
|
||||
Item = ThreadTries.PushBack();
|
||||
Item = ThreadTries->PushBack();
|
||||
Item->TId = TId;
|
||||
|
||||
// Here we're using the internal allocator instead of the managed allocator
|
||||
|
@ -188,15 +198,15 @@ void serialize() {
|
|||
SpinMutexLock Lock(&GlobalMutex);
|
||||
|
||||
// Clear out the global ProfileBuffers.
|
||||
for (uptr I = 0; I < ProfileBuffers.Size(); ++I)
|
||||
InternalFree(ProfileBuffers[I].Data);
|
||||
ProfileBuffers.Reset();
|
||||
for (uptr I = 0; I < ProfileBuffers->Size(); ++I)
|
||||
InternalFree((*ProfileBuffers)[I].Data);
|
||||
ProfileBuffers->Reset();
|
||||
|
||||
if (ThreadTries.Size() == 0)
|
||||
if (ThreadTries->Size() == 0)
|
||||
return;
|
||||
|
||||
// Then repopulate the global ProfileBuffers.
|
||||
for (u32 I = 0; I < ThreadTries.Size(); ++I) {
|
||||
for (u32 I = 0; I < ThreadTries->Size(); ++I) {
|
||||
using ProfileRecordAllocator = typename ProfileRecordArray::AllocatorType;
|
||||
ProfileRecordAllocator PRAlloc(profilingFlags()->global_allocator_max, 0);
|
||||
ProfileRecord::PathAllocator PathAlloc(
|
||||
|
@ -207,7 +217,7 @@ void serialize() {
|
|||
// use a local allocator and an __xray::Array<...> to store the intermediary
|
||||
// data, then compute the size as we're going along. Then we'll allocate the
|
||||
// contiguous space to contain the thread buffer data.
|
||||
const auto &Trie = *ThreadTries[I].Trie;
|
||||
const auto &Trie = *(*ThreadTries)[I].Trie;
|
||||
if (Trie.getRoots().empty())
|
||||
continue;
|
||||
populateRecords(ProfileRecords, PathAlloc, Trie);
|
||||
|
@ -227,8 +237,8 @@ void serialize() {
|
|||
for (const auto &Record : ProfileRecords)
|
||||
CumulativeSizes += 20 + (4 * Record.Path->size());
|
||||
|
||||
BlockHeader Header{16 + CumulativeSizes, I, ThreadTries[I].TId};
|
||||
auto Buffer = ProfileBuffers.PushBack();
|
||||
BlockHeader Header{16 + CumulativeSizes, I, (*ThreadTries)[I].TId};
|
||||
auto Buffer = ProfileBuffers->PushBack();
|
||||
Buffer->Size = sizeof(Header) + CumulativeSizes;
|
||||
Buffer->Data = InternalAlloc(Buffer->Size, nullptr, 64);
|
||||
DCHECK_NE(Buffer->Data, nullptr);
|
||||
|
@ -244,18 +254,26 @@ void serialize() {
|
|||
|
||||
void reset() {
|
||||
SpinMutexLock Lock(&GlobalMutex);
|
||||
// Clear out the profile buffers that have been serialized.
|
||||
for (uptr I = 0; I < ProfileBuffers.Size(); ++I)
|
||||
InternalFree(ProfileBuffers[I].Data);
|
||||
ProfileBuffers.Reset();
|
||||
|
||||
// Clear out the function call tries per thread.
|
||||
for (uptr I = 0; I < ThreadTries.Size(); ++I) {
|
||||
auto &T = ThreadTries[I];
|
||||
T.Trie->~FunctionCallTrie();
|
||||
InternalFree(T.Trie);
|
||||
if (ProfileBuffers != nullptr) {
|
||||
// Clear out the profile buffers that have been serialized.
|
||||
for (uptr I = 0; I < ProfileBuffers->Size(); ++I)
|
||||
InternalFree((*ProfileBuffers)[I].Data);
|
||||
ProfileBuffers->Reset();
|
||||
InternalFree(ProfileBuffers);
|
||||
ProfileBuffers = nullptr;
|
||||
}
|
||||
|
||||
if (ThreadTries != nullptr) {
|
||||
// Clear out the function call tries per thread.
|
||||
for (uptr I = 0; I < ThreadTries->Size(); ++I) {
|
||||
auto &T = (*ThreadTries)[I];
|
||||
T.Trie->~FunctionCallTrie();
|
||||
InternalFree(T.Trie);
|
||||
}
|
||||
ThreadTries->Reset();
|
||||
InternalFree(ThreadTries);
|
||||
ThreadTries = nullptr;
|
||||
}
|
||||
ThreadTries.Reset();
|
||||
|
||||
// Reset the global allocators.
|
||||
if (GlobalAllocators != nullptr) {
|
||||
|
@ -267,18 +285,29 @@ void reset() {
|
|||
InternalAlloc(sizeof(FunctionCallTrie::Allocators)));
|
||||
new (GlobalAllocators) FunctionCallTrie::Allocators();
|
||||
*GlobalAllocators = FunctionCallTrie::InitAllocators();
|
||||
ThreadTries = reinterpret_cast<Vector<ThreadTrie> *>(
|
||||
InternalAlloc(sizeof(Vector<ThreadTrie>)));
|
||||
new (ThreadTries) Vector<ThreadTrie>();
|
||||
ProfileBuffers = reinterpret_cast<Vector<ProfileBuffer> *>(
|
||||
InternalAlloc(sizeof(Vector<ProfileBuffer>)));
|
||||
new (ProfileBuffers) Vector<ProfileBuffer>();
|
||||
}
|
||||
|
||||
XRayBuffer nextBuffer(XRayBuffer B) {
|
||||
SpinMutexLock Lock(&GlobalMutex);
|
||||
if (B.Data == nullptr && ProfileBuffers.Size())
|
||||
return {ProfileBuffers[0].Data, ProfileBuffers[0].Size};
|
||||
|
||||
if (ProfileBuffers == nullptr || ProfileBuffers->Size() == 0)
|
||||
return {nullptr, 0};
|
||||
|
||||
if (B.Data == nullptr)
|
||||
return {(*ProfileBuffers)[0].Data, (*ProfileBuffers)[0].Size};
|
||||
|
||||
BlockHeader Header;
|
||||
internal_memcpy(&Header, B.Data, sizeof(BlockHeader));
|
||||
auto NextBlock = Header.BlockNum + 1;
|
||||
if (NextBlock < ProfileBuffers.Size())
|
||||
return {ProfileBuffers[NextBlock].Data, ProfileBuffers[NextBlock].Size};
|
||||
if (NextBlock < ProfileBuffers->Size())
|
||||
return {(*ProfileBuffers)[NextBlock].Data,
|
||||
(*ProfileBuffers)[NextBlock].Size};
|
||||
return {nullptr, 0};
|
||||
}
|
||||
|
||||
|
|
|
@ -277,7 +277,7 @@ profilingLoggingInit(size_t BufferSize, size_t BufferMax, void *Options,
|
|||
// We need to reset the profile data collection implementation now.
|
||||
profileCollectorService::reset();
|
||||
|
||||
// We need to set up the at-thread-exit handler.
|
||||
// We need to set up the exit handlers.
|
||||
static pthread_once_t Once = PTHREAD_ONCE_INIT;
|
||||
pthread_once(&Once, +[] {
|
||||
pthread_key_create(&ProfilingKey, +[](void *P) {
|
||||
|
@ -288,6 +288,19 @@ profilingLoggingInit(size_t BufferSize, size_t BufferMax, void *Options,
|
|||
|
||||
postCurrentThreadFCT(TLD);
|
||||
});
|
||||
|
||||
// We also need to set up an exit handler, so that we can get the profile
|
||||
// information at exit time. We use the C API to do this, to not rely on C++
|
||||
// ABI functions for registering exit handlers.
|
||||
Atexit(+[] {
|
||||
// Finalize and flush.
|
||||
if (profilingFinalize() != XRAY_LOG_FINALIZED)
|
||||
return;
|
||||
if (profilingFlush() != XRAY_LOG_FLUSHED)
|
||||
return;
|
||||
if (Verbosity())
|
||||
Report("XRay Profile flushed at exit.");
|
||||
});
|
||||
});
|
||||
|
||||
__xray_log_set_buffer_iterator(profileCollectorService::nextBuffer);
|
||||
|
@ -321,13 +334,16 @@ bool profilingDynamicInitializer() XRAY_NEVER_INSTRUMENT {
|
|||
profilingFlush,
|
||||
};
|
||||
auto RegistrationResult = __xray_log_register_mode("xray-profiling", Impl);
|
||||
if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK &&
|
||||
Verbosity())
|
||||
Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = "
|
||||
"%d\n",
|
||||
RegistrationResult);
|
||||
if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) {
|
||||
if (Verbosity())
|
||||
Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = "
|
||||
"%d\n",
|
||||
RegistrationResult);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!internal_strcmp(flags()->xray_mode, "xray-profiling"))
|
||||
__xray_set_log_impl(Impl);
|
||||
__xray_log_select_mode("xray_profiling");
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ XRAY_FLAG(uptr, global_allocator_max, 2 << 24,
|
|||
"Maximum size of the global allocator for profile storage.")
|
||||
XRAY_FLAG(uptr, stack_allocator_max, 2 << 24,
|
||||
"Maximum size of the traversal stack allocator.")
|
||||
XRAY_FLAG(int, grace_period_ms, 100,
|
||||
XRAY_FLAG(int, grace_period_ms, 1,
|
||||
"Profile collection will wait this much time in milliseconds before "
|
||||
"resetting the global state. This gives a chance to threads to "
|
||||
"notice that the profiler has been finalized and clean up.")
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
// RUN: XRAY_PROFILING_OPTIONS=no_flush=1 %run %t
|
||||
// RUN: XRAY_OPTIONS=verbosity=1 %run %t
|
||||
// RUN: PROFILES=`ls xray-log.profiling-multi-* | wc -l`
|
||||
// RUN: [ $PROFILES -eq 1 ]
|
||||
// RUN: [ $PROFILES -ge 1 ]
|
||||
// RUN: rm -f xray-log.profiling-multi-*
|
||||
//
|
||||
// REQUIRES: x86_64-target-arch
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
// RUN: XRAY_PROFILING_OPTIONS=no_flush=true %run %t
|
||||
// RUN: XRAY_OPTIONS=verbosity=1 %run %t
|
||||
// RUN: PROFILES=`ls xray-log.profiling-single-* | wc -l`
|
||||
// RUN: [ $PROFILES -eq 2 ]
|
||||
// RUN: [ $PROFILES -ge 2 ]
|
||||
// RUN: rm -f xray-log.profiling-single-*
|
||||
//
|
||||
// REQUIRES: x86_64-target-arch
|
||||
|
|
Loading…
Reference in New Issue