2016-12-06 14:24:08 +08:00
|
|
|
//===-- xray_buffer_queue.cc -----------------------------------*- C++ -*-===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This file is a part of XRay, a dynamic runtime instruementation system.
|
|
|
|
//
|
|
|
|
// Defines the interface for a buffer queue implementation.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "xray_buffer_queue.h"
|
2017-03-22 12:40:32 +08:00
|
|
|
#include "sanitizer_common/sanitizer_common.h"
|
|
|
|
#include "sanitizer_common/sanitizer_libc.h"
|
2018-07-30 13:56:42 +08:00
|
|
|
#include "sanitizer_common/sanitizer_posix.h"
|
2018-06-05 11:46:54 +08:00
|
|
|
#include <memory>
|
2018-07-30 13:56:42 +08:00
|
|
|
#include <sys/mman.h>
|
|
|
|
|
|
|
|
#ifndef MAP_NORESERVE
|
|
|
|
// no-op on NetBSD (at least), unsupported flag on FreeBSD
|
|
|
|
#define MAP_NORESERVE 0
|
|
|
|
#endif
|
2017-03-22 12:40:32 +08:00
|
|
|
|
2016-12-06 14:24:08 +08:00
|
|
|
using namespace __xray;
|
2017-03-22 12:40:32 +08:00
|
|
|
using namespace __sanitizer;
|
2016-12-06 14:24:08 +08:00
|
|
|
|
2018-07-30 13:56:42 +08:00
|
|
|
template <class T> static T *allocRaw(size_t N) {
|
|
|
|
// TODO: Report errors?
|
|
|
|
// We use MAP_NORESERVE on platforms where it's supported to ensure that the
|
|
|
|
// pages we're allocating for XRay never end up in pages that can be swapped
|
|
|
|
// in/out. We're doing this because for FDR mode, we want to ensure that
|
|
|
|
// writes to the buffers stay resident in memory to prevent XRay itself from
|
|
|
|
// causing swapping/thrashing.
|
|
|
|
//
|
|
|
|
// In the case when XRay pages cannot be swapped in/out or there's not enough
|
|
|
|
// RAM to back these pages, we're willing to cause a segmentation fault
|
|
|
|
// instead of introducing latency in the measurement. We assume here that
|
|
|
|
// there are enough pages that are swappable in/out outside of the buffers
|
|
|
|
// being used by FDR mode (which are bounded and configurable anyway) to allow
|
|
|
|
// us to keep using always-resident memory.
|
|
|
|
//
|
|
|
|
// TODO: Make this configurable?
|
|
|
|
void *A = reinterpret_cast<void *>(
|
|
|
|
internal_mmap(NULL, N * sizeof(T), PROT_WRITE | PROT_READ,
|
|
|
|
MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0));
|
|
|
|
return (A == MAP_FAILED) ? nullptr : reinterpret_cast<T *>(A);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class T> static void deallocRaw(T *ptr, size_t N) {
|
|
|
|
// TODO: Report errors?
|
|
|
|
if (ptr != nullptr)
|
|
|
|
internal_munmap(ptr, N);
|
|
|
|
}
|
|
|
|
|
2018-06-05 11:46:54 +08:00
|
|
|
template <class T> static T *initArray(size_t N) {
|
2018-07-30 13:56:42 +08:00
|
|
|
auto A = allocRaw<T>(N);
|
2018-06-05 11:46:54 +08:00
|
|
|
if (A != nullptr)
|
|
|
|
while (N > 0)
|
|
|
|
new (A + (--N)) T();
|
|
|
|
return A;
|
|
|
|
}
|
|
|
|
|
2017-10-24 09:39:59 +08:00
|
|
|
BufferQueue::BufferQueue(size_t B, size_t N, bool &Success)
|
2018-06-05 11:46:54 +08:00
|
|
|
: BufferSize(B), Buffers(initArray<BufferQueue::BufferRep>(N)),
|
|
|
|
BufferCount(N), Finalizing{0}, OwnedBuffers(initArray<void *>(N)),
|
|
|
|
Next(Buffers), First(Buffers), LiveBuffers(0) {
|
|
|
|
if (Buffers == nullptr) {
|
|
|
|
Success = false;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (OwnedBuffers == nullptr) {
|
|
|
|
// Clean up the buffers we've already allocated.
|
|
|
|
for (auto B = Buffers, E = Buffers + BufferCount; B != E; ++B)
|
|
|
|
B->~BufferRep();
|
2018-07-30 13:56:42 +08:00
|
|
|
deallocRaw(Buffers, N);
|
2018-06-05 11:46:54 +08:00
|
|
|
Success = false;
|
|
|
|
return;
|
|
|
|
};
|
|
|
|
|
2017-10-04 13:20:13 +08:00
|
|
|
for (size_t i = 0; i < N; ++i) {
|
|
|
|
auto &T = Buffers[i];
|
2018-07-30 13:56:42 +08:00
|
|
|
void *Tmp = allocRaw<char>(BufferSize);
|
[XRay][compiler-rt] XRay Flight Data Recorder Mode
Summary:
In this change we introduce the notion of a "flight data recorder" mode
for XRay logging, where XRay logs in-memory first, and write out data
on-demand as required (as opposed to the naive implementation that keeps
logging while tracing is "on"). This depends on D26232 where we
implement the core data structure for holding the buffers that threads
will be using to write out records of operation.
This implementation only currently works on x86_64 and depends heavily
on the TSC math to write out smaller records to the inmemory buffers.
Also, this implementation defines two different kinds of records with
different sizes (compared to the current naive implementation): a
MetadataRecord (16 bytes) and a FunctionRecord (8 bytes). MetadataRecord
entries are meant to write out information like the thread ID for which
the metadata record is defined for, whether the execution of a thread
moved to a different CPU, etc. while a FunctionRecord represents the
different kinds of function call entry/exit records we might encounter
in the course of a thread's execution along with a delta from the last
time the logging handler was called.
While this implementation is not exactly what is described in the
original XRay whitepaper, this one gives us an initial implementation
that we can iterate and build upon.
Reviewers: echristo, rSerge, majnemer
Subscribers: mehdi_amini, llvm-commits, mgorny
Differential Revision: https://reviews.llvm.org/D27038
llvm-svn: 293015
2017-01-25 11:50:46 +08:00
|
|
|
if (Tmp == nullptr) {
|
|
|
|
Success = false;
|
|
|
|
return;
|
|
|
|
}
|
2018-07-30 13:56:42 +08:00
|
|
|
auto *Extents = allocRaw<BufferExtents>(1);
|
[XRay] Use optimistic logging model for FDR mode
Summary:
Before this change, the FDR mode implementation relied on at thread-exit
handling to return buffers back to the (global) buffer queue. This
introduces issues with the initialisation of the thread_local objects
which, even through the use of pthread_setspecific(...) may eventually
call into an allocation function. Similar to previous changes in this
line, we're finding that there is a huge potential for deadlocks when
initialising these thread-locals when the memory allocation
implementation is also xray-instrumented.
In this change, we limit the call to pthread_setspecific(...) to provide
a non-null value to associate to the key created with
pthread_key_create(...). While this doesn't completely eliminate the
potential for the deadlock(s), it does allow us to still clean up at
thread exit when we need to. The change is that we don't need to do more
work when starting and ending a thread's lifetime. We also have a test
to make sure that we actually can safely recycle the buffers in case we
end up re-using the buffer(s) available from the queue on multiple
thread entry/exits.
This change cuts across both LLVM and compiler-rt to allow us to update
both the XRay runtime implementation as well as the library support for
loading these new versions of the FDR mode logging. Version 2 of the FDR
logging implementation makes the following changes:
* Introduction of a new 'BufferExtents' metadata record that's outside
of the buffer's contents but are written before the actual buffer.
This data is associated to the Buffer handed out by the BufferQueue
rather than a record that occupies bytes in the actual buffer.
* Removal of the "end of buffer" records. This is in-line with the
changes we described above, to allow for optimistic logging without
explicit record writing at thread exit.
The optimistic logging model operates under the following assumptions:
* Threads writing to the buffers will potentially race with the thread
attempting to flush the log. To avoid this situation from occuring,
we make sure that when we've finalized the logging implementation,
that threads will see this finalization state on the next write, and
either choose to not write records the thread would have written or
write the record(s) in two phases -- first write the record(s), then
update the extents metadata.
* We change the buffer queue implementation so that once it's handed
out a buffer to a thread, that we assume that buffer is marked
"used" to be able to capture partial writes. None of this will be
safe to handle if threads are racing to write the extents records
and the reader thread is attempting to flush the log. The optimism
comes from the finalization routine being required to complete
before we attempt to flush the log.
This is a fairly significant semantics change for the FDR
implementation. This is why we've decided to update the version number
for FDR mode logs. The tools, however, still need to be able to support
older versions of the log until we finally deprecate those earlier
versions.
Reviewers: dblaikie, pelikan, kpw
Subscribers: llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D39526
llvm-svn: 318733
2017-11-21 15:16:57 +08:00
|
|
|
if (Extents == nullptr) {
|
|
|
|
Success = false;
|
|
|
|
return;
|
|
|
|
}
|
2017-10-24 10:43:49 +08:00
|
|
|
auto &Buf = T.Buff;
|
2018-02-10 17:07:34 +08:00
|
|
|
Buf.Data = Tmp;
|
2016-12-06 14:24:08 +08:00
|
|
|
Buf.Size = B;
|
2018-07-30 13:56:42 +08:00
|
|
|
Buf.Extents = Extents;
|
2017-10-04 13:20:13 +08:00
|
|
|
OwnedBuffers[i] = Tmp;
|
2016-12-06 14:24:08 +08:00
|
|
|
}
|
[XRay][compiler-rt] XRay Flight Data Recorder Mode
Summary:
In this change we introduce the notion of a "flight data recorder" mode
for XRay logging, where XRay logs in-memory first, and write out data
on-demand as required (as opposed to the naive implementation that keeps
logging while tracing is "on"). This depends on D26232 where we
implement the core data structure for holding the buffers that threads
will be using to write out records of operation.
This implementation only currently works on x86_64 and depends heavily
on the TSC math to write out smaller records to the inmemory buffers.
Also, this implementation defines two different kinds of records with
different sizes (compared to the current naive implementation): a
MetadataRecord (16 bytes) and a FunctionRecord (8 bytes). MetadataRecord
entries are meant to write out information like the thread ID for which
the metadata record is defined for, whether the execution of a thread
moved to a different CPU, etc. while a FunctionRecord represents the
different kinds of function call entry/exit records we might encounter
in the course of a thread's execution along with a delta from the last
time the logging handler was called.
While this implementation is not exactly what is described in the
original XRay whitepaper, this one gives us an initial implementation
that we can iterate and build upon.
Reviewers: echristo, rSerge, majnemer
Subscribers: mehdi_amini, llvm-commits, mgorny
Differential Revision: https://reviews.llvm.org/D27038
llvm-svn: 293015
2017-01-25 11:50:46 +08:00
|
|
|
Success = true;
|
2016-12-06 14:24:08 +08:00
|
|
|
}
|
|
|
|
|
2017-03-22 12:40:32 +08:00
|
|
|
BufferQueue::ErrorCode BufferQueue::getBuffer(Buffer &Buf) {
|
2018-06-05 14:12:42 +08:00
|
|
|
if (atomic_load(&Finalizing, memory_order_acquire))
|
2017-03-22 12:40:32 +08:00
|
|
|
return ErrorCode::QueueFinalizing;
|
2018-06-05 14:12:42 +08:00
|
|
|
SpinMutexLock Guard(&Mutex);
|
[XRay] Use optimistic logging model for FDR mode
Summary:
Before this change, the FDR mode implementation relied on at thread-exit
handling to return buffers back to the (global) buffer queue. This
introduces issues with the initialisation of the thread_local objects
which, even through the use of pthread_setspecific(...) may eventually
call into an allocation function. Similar to previous changes in this
line, we're finding that there is a huge potential for deadlocks when
initialising these thread-locals when the memory allocation
implementation is also xray-instrumented.
In this change, we limit the call to pthread_setspecific(...) to provide
a non-null value to associate to the key created with
pthread_key_create(...). While this doesn't completely eliminate the
potential for the deadlock(s), it does allow us to still clean up at
thread exit when we need to. The change is that we don't need to do more
work when starting and ending a thread's lifetime. We also have a test
to make sure that we actually can safely recycle the buffers in case we
end up re-using the buffer(s) available from the queue on multiple
thread entry/exits.
This change cuts across both LLVM and compiler-rt to allow us to update
both the XRay runtime implementation as well as the library support for
loading these new versions of the FDR mode logging. Version 2 of the FDR
logging implementation makes the following changes:
* Introduction of a new 'BufferExtents' metadata record that's outside
of the buffer's contents but are written before the actual buffer.
This data is associated to the Buffer handed out by the BufferQueue
rather than a record that occupies bytes in the actual buffer.
* Removal of the "end of buffer" records. This is in-line with the
changes we described above, to allow for optimistic logging without
explicit record writing at thread exit.
The optimistic logging model operates under the following assumptions:
* Threads writing to the buffers will potentially race with the thread
attempting to flush the log. To avoid this situation from occuring,
we make sure that when we've finalized the logging implementation,
that threads will see this finalization state on the next write, and
either choose to not write records the thread would have written or
write the record(s) in two phases -- first write the record(s), then
update the extents metadata.
* We change the buffer queue implementation so that once it's handed
out a buffer to a thread, that we assume that buffer is marked
"used" to be able to capture partial writes. None of this will be
safe to handle if threads are racing to write the extents records
and the reader thread is attempting to flush the log. The optimism
comes from the finalization routine being required to complete
before we attempt to flush the log.
This is a fairly significant semantics change for the FDR
implementation. This is why we've decided to update the version number
for FDR mode logs. The tools, however, still need to be able to support
older versions of the log until we finally deprecate those earlier
versions.
Reviewers: dblaikie, pelikan, kpw
Subscribers: llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D39526
llvm-svn: 318733
2017-11-21 15:16:57 +08:00
|
|
|
if (LiveBuffers == BufferCount)
|
|
|
|
return ErrorCode::NotEnoughMemory;
|
2017-10-04 13:20:13 +08:00
|
|
|
|
|
|
|
auto &T = *Next;
|
2017-10-24 10:43:49 +08:00
|
|
|
auto &B = T.Buff;
|
[XRay][compiler-rt] XRay Flight Data Recorder Mode
Summary:
In this change we introduce the notion of a "flight data recorder" mode
for XRay logging, where XRay logs in-memory first, and write out data
on-demand as required (as opposed to the naive implementation that keeps
logging while tracing is "on"). This depends on D26232 where we
implement the core data structure for holding the buffers that threads
will be using to write out records of operation.
This implementation only currently works on x86_64 and depends heavily
on the TSC math to write out smaller records to the inmemory buffers.
Also, this implementation defines two different kinds of records with
different sizes (compared to the current naive implementation): a
MetadataRecord (16 bytes) and a FunctionRecord (8 bytes). MetadataRecord
entries are meant to write out information like the thread ID for which
the metadata record is defined for, whether the execution of a thread
moved to a different CPU, etc. while a FunctionRecord represents the
different kinds of function call entry/exit records we might encounter
in the course of a thread's execution along with a delta from the last
time the logging handler was called.
While this implementation is not exactly what is described in the
original XRay whitepaper, this one gives us an initial implementation
that we can iterate and build upon.
Reviewers: echristo, rSerge, majnemer
Subscribers: mehdi_amini, llvm-commits, mgorny
Differential Revision: https://reviews.llvm.org/D27038
llvm-svn: 293015
2017-01-25 11:50:46 +08:00
|
|
|
Buf = B;
|
[XRay] Use optimistic logging model for FDR mode
Summary:
Before this change, the FDR mode implementation relied on at thread-exit
handling to return buffers back to the (global) buffer queue. This
introduces issues with the initialisation of the thread_local objects
which, even through the use of pthread_setspecific(...) may eventually
call into an allocation function. Similar to previous changes in this
line, we're finding that there is a huge potential for deadlocks when
initialising these thread-locals when the memory allocation
implementation is also xray-instrumented.
In this change, we limit the call to pthread_setspecific(...) to provide
a non-null value to associate to the key created with
pthread_key_create(...). While this doesn't completely eliminate the
potential for the deadlock(s), it does allow us to still clean up at
thread exit when we need to. The change is that we don't need to do more
work when starting and ending a thread's lifetime. We also have a test
to make sure that we actually can safely recycle the buffers in case we
end up re-using the buffer(s) available from the queue on multiple
thread entry/exits.
This change cuts across both LLVM and compiler-rt to allow us to update
both the XRay runtime implementation as well as the library support for
loading these new versions of the FDR mode logging. Version 2 of the FDR
logging implementation makes the following changes:
* Introduction of a new 'BufferExtents' metadata record that's outside
of the buffer's contents but are written before the actual buffer.
This data is associated to the Buffer handed out by the BufferQueue
rather than a record that occupies bytes in the actual buffer.
* Removal of the "end of buffer" records. This is in-line with the
changes we described above, to allow for optimistic logging without
explicit record writing at thread exit.
The optimistic logging model operates under the following assumptions:
* Threads writing to the buffers will potentially race with the thread
attempting to flush the log. To avoid this situation from occuring,
we make sure that when we've finalized the logging implementation,
that threads will see this finalization state on the next write, and
either choose to not write records the thread would have written or
write the record(s) in two phases -- first write the record(s), then
update the extents metadata.
* We change the buffer queue implementation so that once it's handed
out a buffer to a thread, that we assume that buffer is marked
"used" to be able to capture partial writes. None of this will be
safe to handle if threads are racing to write the extents records
and the reader thread is attempting to flush the log. The optimism
comes from the finalization routine being required to complete
before we attempt to flush the log.
This is a fairly significant semantics change for the FDR
implementation. This is why we've decided to update the version number
for FDR mode logs. The tools, however, still need to be able to support
older versions of the log until we finally deprecate those earlier
versions.
Reviewers: dblaikie, pelikan, kpw
Subscribers: llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D39526
llvm-svn: 318733
2017-11-21 15:16:57 +08:00
|
|
|
T.Used = true;
|
2017-10-04 13:20:13 +08:00
|
|
|
++LiveBuffers;
|
|
|
|
|
[XRay] Use optimistic logging model for FDR mode
Summary:
Before this change, the FDR mode implementation relied on at thread-exit
handling to return buffers back to the (global) buffer queue. This
introduces issues with the initialisation of the thread_local objects
which, even through the use of pthread_setspecific(...) may eventually
call into an allocation function. Similar to previous changes in this
line, we're finding that there is a huge potential for deadlocks when
initialising these thread-locals when the memory allocation
implementation is also xray-instrumented.
In this change, we limit the call to pthread_setspecific(...) to provide
a non-null value to associate to the key created with
pthread_key_create(...). While this doesn't completely eliminate the
potential for the deadlock(s), it does allow us to still clean up at
thread exit when we need to. The change is that we don't need to do more
work when starting and ending a thread's lifetime. We also have a test
to make sure that we actually can safely recycle the buffers in case we
end up re-using the buffer(s) available from the queue on multiple
thread entry/exits.
This change cuts across both LLVM and compiler-rt to allow us to update
both the XRay runtime implementation as well as the library support for
loading these new versions of the FDR mode logging. Version 2 of the FDR
logging implementation makes the following changes:
* Introduction of a new 'BufferExtents' metadata record that's outside
of the buffer's contents but are written before the actual buffer.
This data is associated to the Buffer handed out by the BufferQueue
rather than a record that occupies bytes in the actual buffer.
* Removal of the "end of buffer" records. This is in-line with the
changes we described above, to allow for optimistic logging without
explicit record writing at thread exit.
The optimistic logging model operates under the following assumptions:
* Threads writing to the buffers will potentially race with the thread
attempting to flush the log. To avoid this situation from occuring,
we make sure that when we've finalized the logging implementation,
that threads will see this finalization state on the next write, and
either choose to not write records the thread would have written or
write the record(s) in two phases -- first write the record(s), then
update the extents metadata.
* We change the buffer queue implementation so that once it's handed
out a buffer to a thread, that we assume that buffer is marked
"used" to be able to capture partial writes. None of this will be
safe to handle if threads are racing to write the extents records
and the reader thread is attempting to flush the log. The optimism
comes from the finalization routine being required to complete
before we attempt to flush the log.
This is a fairly significant semantics change for the FDR
implementation. This is why we've decided to update the version number
for FDR mode logs. The tools, however, still need to be able to support
older versions of the log until we finally deprecate those earlier
versions.
Reviewers: dblaikie, pelikan, kpw
Subscribers: llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D39526
llvm-svn: 318733
2017-11-21 15:16:57 +08:00
|
|
|
if (++Next == (Buffers + BufferCount))
|
|
|
|
Next = Buffers;
|
2017-10-04 13:20:13 +08:00
|
|
|
|
2017-03-22 12:40:32 +08:00
|
|
|
return ErrorCode::Ok;
|
2016-12-06 14:24:08 +08:00
|
|
|
}
|
|
|
|
|
2017-03-22 12:40:32 +08:00
|
|
|
BufferQueue::ErrorCode BufferQueue::releaseBuffer(Buffer &Buf) {
|
2017-10-04 13:20:13 +08:00
|
|
|
// Blitz through the buffers array to find the buffer.
|
2017-10-24 09:39:59 +08:00
|
|
|
bool Found = false;
|
|
|
|
for (auto I = OwnedBuffers, E = OwnedBuffers + BufferCount; I != E; ++I) {
|
2018-02-10 17:07:34 +08:00
|
|
|
if (*I == Buf.Data) {
|
2017-10-24 09:39:59 +08:00
|
|
|
Found = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
[XRay] Use optimistic logging model for FDR mode
Summary:
Before this change, the FDR mode implementation relied on at thread-exit
handling to return buffers back to the (global) buffer queue. This
introduces issues with the initialisation of the thread_local objects
which, even through the use of pthread_setspecific(...) may eventually
call into an allocation function. Similar to previous changes in this
line, we're finding that there is a huge potential for deadlocks when
initialising these thread-locals when the memory allocation
implementation is also xray-instrumented.
In this change, we limit the call to pthread_setspecific(...) to provide
a non-null value to associate to the key created with
pthread_key_create(...). While this doesn't completely eliminate the
potential for the deadlock(s), it does allow us to still clean up at
thread exit when we need to. The change is that we don't need to do more
work when starting and ending a thread's lifetime. We also have a test
to make sure that we actually can safely recycle the buffers in case we
end up re-using the buffer(s) available from the queue on multiple
thread entry/exits.
This change cuts across both LLVM and compiler-rt to allow us to update
both the XRay runtime implementation as well as the library support for
loading these new versions of the FDR mode logging. Version 2 of the FDR
logging implementation makes the following changes:
* Introduction of a new 'BufferExtents' metadata record that's outside
of the buffer's contents but are written before the actual buffer.
This data is associated to the Buffer handed out by the BufferQueue
rather than a record that occupies bytes in the actual buffer.
* Removal of the "end of buffer" records. This is in-line with the
changes we described above, to allow for optimistic logging without
explicit record writing at thread exit.
The optimistic logging model operates under the following assumptions:
* Threads writing to the buffers will potentially race with the thread
attempting to flush the log. To avoid this situation from occuring,
we make sure that when we've finalized the logging implementation,
that threads will see this finalization state on the next write, and
either choose to not write records the thread would have written or
write the record(s) in two phases -- first write the record(s), then
update the extents metadata.
* We change the buffer queue implementation so that once it's handed
out a buffer to a thread, that we assume that buffer is marked
"used" to be able to capture partial writes. None of this will be
safe to handle if threads are racing to write the extents records
and the reader thread is attempting to flush the log. The optimism
comes from the finalization routine being required to complete
before we attempt to flush the log.
This is a fairly significant semantics change for the FDR
implementation. This is why we've decided to update the version number
for FDR mode logs. The tools, however, still need to be able to support
older versions of the log until we finally deprecate those earlier
versions.
Reviewers: dblaikie, pelikan, kpw
Subscribers: llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D39526
llvm-svn: 318733
2017-11-21 15:16:57 +08:00
|
|
|
if (!Found)
|
|
|
|
return ErrorCode::UnrecognizedBuffer;
|
2017-10-24 09:39:59 +08:00
|
|
|
|
2018-06-05 14:12:42 +08:00
|
|
|
SpinMutexLock Guard(&Mutex);
|
2017-10-04 13:20:13 +08:00
|
|
|
|
|
|
|
// This points to a semantic bug, we really ought to not be releasing more
|
|
|
|
// buffers than we actually get.
|
[XRay] Use optimistic logging model for FDR mode
Summary:
Before this change, the FDR mode implementation relied on at thread-exit
handling to return buffers back to the (global) buffer queue. This
introduces issues with the initialisation of the thread_local objects
which, even through the use of pthread_setspecific(...) may eventually
call into an allocation function. Similar to previous changes in this
line, we're finding that there is a huge potential for deadlocks when
initialising these thread-locals when the memory allocation
implementation is also xray-instrumented.
In this change, we limit the call to pthread_setspecific(...) to provide
a non-null value to associate to the key created with
pthread_key_create(...). While this doesn't completely eliminate the
potential for the deadlock(s), it does allow us to still clean up at
thread exit when we need to. The change is that we don't need to do more
work when starting and ending a thread's lifetime. We also have a test
to make sure that we actually can safely recycle the buffers in case we
end up re-using the buffer(s) available from the queue on multiple
thread entry/exits.
This change cuts across both LLVM and compiler-rt to allow us to update
both the XRay runtime implementation as well as the library support for
loading these new versions of the FDR mode logging. Version 2 of the FDR
logging implementation makes the following changes:
* Introduction of a new 'BufferExtents' metadata record that's outside
of the buffer's contents but are written before the actual buffer.
This data is associated to the Buffer handed out by the BufferQueue
rather than a record that occupies bytes in the actual buffer.
* Removal of the "end of buffer" records. This is in-line with the
changes we described above, to allow for optimistic logging without
explicit record writing at thread exit.
The optimistic logging model operates under the following assumptions:
* Threads writing to the buffers will potentially race with the thread
attempting to flush the log. To avoid this situation from occuring,
we make sure that when we've finalized the logging implementation,
that threads will see this finalization state on the next write, and
either choose to not write records the thread would have written or
write the record(s) in two phases -- first write the record(s), then
update the extents metadata.
* We change the buffer queue implementation so that once it's handed
out a buffer to a thread, that we assume that buffer is marked
"used" to be able to capture partial writes. None of this will be
safe to handle if threads are racing to write the extents records
and the reader thread is attempting to flush the log. The optimism
comes from the finalization routine being required to complete
before we attempt to flush the log.
This is a fairly significant semantics change for the FDR
implementation. This is why we've decided to update the version number
for FDR mode logs. The tools, however, still need to be able to support
older versions of the log until we finally deprecate those earlier
versions.
Reviewers: dblaikie, pelikan, kpw
Subscribers: llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D39526
llvm-svn: 318733
2017-11-21 15:16:57 +08:00
|
|
|
if (LiveBuffers == 0)
|
|
|
|
return ErrorCode::NotEnoughMemory;
|
[XRay][compiler-rt] XRay Flight Data Recorder Mode
Summary:
In this change we introduce the notion of a "flight data recorder" mode
for XRay logging, where XRay logs in-memory first, and write out data
on-demand as required (as opposed to the naive implementation that keeps
logging while tracing is "on"). This depends on D26232 where we
implement the core data structure for holding the buffers that threads
will be using to write out records of operation.
This implementation only currently works on x86_64 and depends heavily
on the TSC math to write out smaller records to the inmemory buffers.
Also, this implementation defines two different kinds of records with
different sizes (compared to the current naive implementation): a
MetadataRecord (16 bytes) and a FunctionRecord (8 bytes). MetadataRecord
entries are meant to write out information like the thread ID for which
the metadata record is defined for, whether the execution of a thread
moved to a different CPU, etc. while a FunctionRecord represents the
different kinds of function call entry/exit records we might encounter
in the course of a thread's execution along with a delta from the last
time the logging handler was called.
While this implementation is not exactly what is described in the
original XRay whitepaper, this one gives us an initial implementation
that we can iterate and build upon.
Reviewers: echristo, rSerge, majnemer
Subscribers: mehdi_amini, llvm-commits, mgorny
Differential Revision: https://reviews.llvm.org/D27038
llvm-svn: 293015
2017-01-25 11:50:46 +08:00
|
|
|
|
|
|
|
// Now that the buffer has been released, we mark it as "used".
|
2017-10-24 10:36:32 +08:00
|
|
|
First->Buff = Buf;
|
2017-10-24 09:39:59 +08:00
|
|
|
First->Used = true;
|
2018-02-10 17:07:34 +08:00
|
|
|
Buf.Data = nullptr;
|
[XRay][compiler-rt] XRay Flight Data Recorder Mode
Summary:
In this change we introduce the notion of a "flight data recorder" mode
for XRay logging, where XRay logs in-memory first, and write out data
on-demand as required (as opposed to the naive implementation that keeps
logging while tracing is "on"). This depends on D26232 where we
implement the core data structure for holding the buffers that threads
will be using to write out records of operation.
This implementation only currently works on x86_64 and depends heavily
on the TSC math to write out smaller records to the inmemory buffers.
Also, this implementation defines two different kinds of records with
different sizes (compared to the current naive implementation): a
MetadataRecord (16 bytes) and a FunctionRecord (8 bytes). MetadataRecord
entries are meant to write out information like the thread ID for which
the metadata record is defined for, whether the execution of a thread
moved to a different CPU, etc. while a FunctionRecord represents the
different kinds of function call entry/exit records we might encounter
in the course of a thread's execution along with a delta from the last
time the logging handler was called.
While this implementation is not exactly what is described in the
original XRay whitepaper, this one gives us an initial implementation
that we can iterate and build upon.
Reviewers: echristo, rSerge, majnemer
Subscribers: mehdi_amini, llvm-commits, mgorny
Differential Revision: https://reviews.llvm.org/D27038
llvm-svn: 293015
2017-01-25 11:50:46 +08:00
|
|
|
Buf.Size = 0;
|
2017-10-04 13:20:13 +08:00
|
|
|
--LiveBuffers;
|
[XRay] Use optimistic logging model for FDR mode
Summary:
Before this change, the FDR mode implementation relied on at thread-exit
handling to return buffers back to the (global) buffer queue. This
introduces issues with the initialisation of the thread_local objects
which, even through the use of pthread_setspecific(...) may eventually
call into an allocation function. Similar to previous changes in this
line, we're finding that there is a huge potential for deadlocks when
initialising these thread-locals when the memory allocation
implementation is also xray-instrumented.
In this change, we limit the call to pthread_setspecific(...) to provide
a non-null value to associate to the key created with
pthread_key_create(...). While this doesn't completely eliminate the
potential for the deadlock(s), it does allow us to still clean up at
thread exit when we need to. The change is that we don't need to do more
work when starting and ending a thread's lifetime. We also have a test
to make sure that we actually can safely recycle the buffers in case we
end up re-using the buffer(s) available from the queue on multiple
thread entry/exits.
This change cuts across both LLVM and compiler-rt to allow us to update
both the XRay runtime implementation as well as the library support for
loading these new versions of the FDR mode logging. Version 2 of the FDR
logging implementation makes the following changes:
* Introduction of a new 'BufferExtents' metadata record that's outside
of the buffer's contents but are written before the actual buffer.
This data is associated to the Buffer handed out by the BufferQueue
rather than a record that occupies bytes in the actual buffer.
* Removal of the "end of buffer" records. This is in-line with the
changes we described above, to allow for optimistic logging without
explicit record writing at thread exit.
The optimistic logging model operates under the following assumptions:
* Threads writing to the buffers will potentially race with the thread
attempting to flush the log. To avoid this situation from occuring,
we make sure that when we've finalized the logging implementation,
that threads will see this finalization state on the next write, and
either choose to not write records the thread would have written or
write the record(s) in two phases -- first write the record(s), then
update the extents metadata.
* We change the buffer queue implementation so that once it's handed
out a buffer to a thread, that we assume that buffer is marked
"used" to be able to capture partial writes. None of this will be
safe to handle if threads are racing to write the extents records
and the reader thread is attempting to flush the log. The optimism
comes from the finalization routine being required to complete
before we attempt to flush the log.
This is a fairly significant semantics change for the FDR
implementation. This is why we've decided to update the version number
for FDR mode logs. The tools, however, still need to be able to support
older versions of the log until we finally deprecate those earlier
versions.
Reviewers: dblaikie, pelikan, kpw
Subscribers: llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D39526
llvm-svn: 318733
2017-11-21 15:16:57 +08:00
|
|
|
if (++First == (Buffers + BufferCount))
|
|
|
|
First = Buffers;
|
2017-10-04 14:02:12 +08:00
|
|
|
|
2017-03-22 12:40:32 +08:00
|
|
|
return ErrorCode::Ok;
|
2016-12-06 14:24:08 +08:00
|
|
|
}
|
|
|
|
|
2017-03-22 12:40:32 +08:00
|
|
|
BufferQueue::ErrorCode BufferQueue::finalize() {
|
2018-06-05 14:12:42 +08:00
|
|
|
if (atomic_exchange(&Finalizing, 1, memory_order_acq_rel))
|
2017-03-22 12:40:32 +08:00
|
|
|
return ErrorCode::QueueFinalizing;
|
|
|
|
return ErrorCode::Ok;
|
2016-12-06 14:24:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
BufferQueue::~BufferQueue() {
|
2017-10-24 09:39:59 +08:00
|
|
|
for (auto I = Buffers, E = Buffers + BufferCount; I != E; ++I) {
|
2017-10-04 13:20:13 +08:00
|
|
|
auto &T = *I;
|
2017-10-24 10:36:32 +08:00
|
|
|
auto &Buf = T.Buff;
|
2018-07-30 13:56:42 +08:00
|
|
|
deallocRaw(Buf.Data, Buf.Size);
|
|
|
|
deallocRaw(Buf.Extents, 1);
|
2016-12-06 14:24:08 +08:00
|
|
|
}
|
2018-06-05 11:46:54 +08:00
|
|
|
for (auto B = Buffers, E = Buffers + BufferCount; B != E; ++B)
|
|
|
|
B->~BufferRep();
|
2018-07-30 13:56:42 +08:00
|
|
|
deallocRaw(Buffers, BufferCount);
|
|
|
|
deallocRaw(OwnedBuffers, BufferCount);
|
2016-12-06 14:24:08 +08:00
|
|
|
}
|