[XRay][compiler-rt] Remove reliance on C++ ABI features

Summary:
This fixes http://llvm.org/PR32274.

This change adds a test to ensure that we're able to link XRay modes and
the runtime to binaries that don't need to depend on the C++ standard
library or a C++ ABI library. In particular, we ensure that this will work
with C programs compiled+linked with XRay.

To make the test pass, we need to change a few things in the XRay
runtime implementations to remove the reliance on C++ ABI features. In
particular, we change the thread-safe function-local-static
initialisation to use pthread_* instead of the C++ features that ensure
non-trivial thread-local/function-local-static initialisation.

Depends on D47696.

Reviewers: dblaikie, jfb, kpw, eizan

Reviewed By: kpw

Subscribers: echristo, eizan, kpw, mgorny, llvm-commits

Differential Revision: https://reviews.llvm.org/D46998

llvm-svn: 334262
This commit is contained in:
Dean Michael Berris 2018-06-08 04:00:07 +00:00
parent 3428beeb2f
commit af4d583ea8
3 changed files with 141 additions and 114 deletions

View File

@ -15,8 +15,6 @@
//
//===----------------------------------------------------------------------===//
#include <cassert>
#include <cstring>
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
@ -73,24 +71,21 @@ BasicLoggingOptions GlobalOptions;
thread_local atomic_uint8_t Guard{0};
static uint64_t thresholdTicks() XRAY_NEVER_INSTRUMENT {
static uint64_t TicksPerSec = probeRequiredCPUFeatures()
? getTSCFrequency()
: NanosecondsPerSecond;
static const uint64_t ThresholdTicks =
TicksPerSec * GlobalOptions.DurationFilterMicros / 1000000;
return ThresholdTicks;
}
static atomic_uint8_t UseRealTSC{0};
static atomic_uint64_t ThresholdTicks{0};
static atomic_uint64_t TicksPerSec{0};
static atomic_uint64_t CycleFrequency{NanosecondsPerSecond};
static int openLogFile() XRAY_NEVER_INSTRUMENT {
int F = getLogFD();
if (F == -1)
return -1;
// Test for required CPU features and cache the cycle frequency
static bool TSCSupported = probeRequiredCPUFeatures();
static uint64_t CycleFrequency =
TSCSupported ? getTSCFrequency() : NanosecondsPerSecond;
static pthread_once_t DetectOnce = PTHREAD_ONCE_INIT;
pthread_once(&DetectOnce, +[] {
if (atomic_load(&UseRealTSC, memory_order_acquire))
atomic_store(&CycleFrequency, getTSCFrequency(), memory_order_release);
});
// Since we're here, we get to write the header. We set it up so that the
// header will only be written once, at the start, and let the threads
@ -98,7 +93,7 @@ static int openLogFile() XRAY_NEVER_INSTRUMENT {
XRayFileHeader Header;
Header.Version = 2; // Version 2 includes tail exit records.
Header.Type = FileTypes::NAIVE_LOG;
Header.CycleFrequency = CycleFrequency;
Header.CycleFrequency = atomic_load(&CycleFrequency, memory_order_acquire);
// FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc'
// before setting the values in the header.
@ -109,12 +104,14 @@ static int openLogFile() XRAY_NEVER_INSTRUMENT {
return F;
}
int getGlobalFd() XRAY_NEVER_INSTRUMENT {
static int Fd = openLogFile();
static int getGlobalFd() XRAY_NEVER_INSTRUMENT {
static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
static int Fd = 0;
pthread_once(&OnceInit, +[] { Fd = openLogFile(); });
return Fd;
}
ThreadLocalData &getThreadLocalData() XRAY_NEVER_INSTRUMENT {
static ThreadLocalData &getThreadLocalData() XRAY_NEVER_INSTRUMENT {
thread_local ThreadLocalData TLD;
thread_local bool UNUSED TOnce = [] {
if (GlobalOptions.ThreadBufferSize == 0) {
@ -143,13 +140,6 @@ ThreadLocalData &getThreadLocalData() XRAY_NEVER_INSTRUMENT {
alignof(StackEntry)));
TLD.StackSize = GlobalOptions.MaxStackDepth;
TLD.StackEntries = 0;
if (Verbosity() >= 2) {
static auto UNUSED Once = [] {
auto ticks = thresholdTicks();
Report("Ticks threshold: %d\n", ticks);
return false;
}();
}
return false;
}();
return TLD;
@ -217,8 +207,8 @@ void InMemoryRawLog(int32_t FuncId, XRayEntryType Type,
if (StackTop.FuncId == FuncId && StackTop.CPU == CPU &&
StackTop.TSC < TSC) {
auto Delta = TSC - StackTop.TSC;
if (Delta < thresholdTicks()) {
assert(TLD.BufferOffset > 0);
if (Delta < atomic_load(&ThresholdTicks, memory_order_relaxed)) {
DCHECK(TLD.BufferOffset > 0);
TLD.BufferOffset -= StackTop.Type == XRayEntryType::ENTRY ? 1 : 2;
return;
}
@ -227,7 +217,7 @@ void InMemoryRawLog(int32_t FuncId, XRayEntryType Type,
}
default:
// Should be unreachable.
assert(false && "Unsupported XRayEntryType encountered.");
DCHECK(false && "Unsupported XRayEntryType encountered.");
break;
}
@ -372,17 +362,26 @@ XRayLogInitStatus basicLoggingInit(size_t BufferSize, size_t BufferMax,
void *Options,
size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
uint8_t Expected = 0;
if (!atomic_compare_exchange_strong(
&BasicInitialized, &Expected, 1, memory_order_acq_rel)) {
if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 1,
memory_order_acq_rel)) {
if (Verbosity())
Report("Basic logging already initialized.\n");
return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
}
static bool UNUSED Once = [] {
static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
pthread_once(&OnceInit, +[] {
pthread_key_create(&PThreadKey, TLDDestructor);
return false;
}();
atomic_store(&UseRealTSC, probeRequiredCPUFeatures(), memory_order_release);
// Initialize the global TicksPerSec value.
atomic_store(&TicksPerSec,
probeRequiredCPUFeatures() ? getTSCFrequency()
: NanosecondsPerSecond,
memory_order_release);
if (!atomic_load(&UseRealTSC, memory_order_relaxed) && Verbosity())
Report("WARNING: Required CPU features missing for XRay instrumentation, "
"using emulation instead.\n");
});
if (BufferSize == 0 && BufferMax == 0 && Options != nullptr) {
FlagParser P;
@ -409,6 +408,7 @@ XRayLogInitStatus basicLoggingInit(size_t BufferSize, size_t BufferMax,
GlobalOptions.ThreadBufferSize = F.thread_buffer_size;
GlobalOptions.DurationFilterMicros = F.func_duration_threshold_us;
GlobalOptions.MaxStackDepth = F.max_stack_depth;
*basicFlags() = F;
} else if (OptionsSize != sizeof(BasicLoggingOptions)) {
Report("Invalid options size, potential ABI mismatch; expected %d got %d",
sizeof(BasicLoggingOptions), OptionsSize);
@ -420,15 +420,19 @@ XRayLogInitStatus basicLoggingInit(size_t BufferSize, size_t BufferMax,
GlobalOptions = *reinterpret_cast<BasicLoggingOptions *>(Options);
}
static auto UseRealTSC = probeRequiredCPUFeatures();
if (!UseRealTSC && Verbosity())
Report("WARNING: Required CPU features missing for XRay instrumentation, "
"using emulation instead.\n");
atomic_store(&ThresholdTicks,
atomic_load(&TicksPerSec, memory_order_acquire) *
GlobalOptions.DurationFilterMicros / 1000000,
memory_order_release);
__xray_set_handler_arg1(atomic_load(&UseRealTSC, memory_order_acquire)
? basicLoggingHandleArg1RealTSC
: basicLoggingHandleArg1EmulateTSC);
__xray_set_handler(atomic_load(&UseRealTSC, memory_order_acquire)
? basicLoggingHandleArg0RealTSC
: basicLoggingHandleArg0EmulateTSC);
__xray_set_handler_arg1(UseRealTSC ? basicLoggingHandleArg1RealTSC
: basicLoggingHandleArg1EmulateTSC);
__xray_set_handler(UseRealTSC ? basicLoggingHandleArg0RealTSC
: basicLoggingHandleArg0EmulateTSC);
// TODO: Implement custom event and typed event handling support in Basic
// Mode.
__xray_remove_customevent_handler();
__xray_remove_typedevent_handler();
@ -437,8 +441,8 @@ XRayLogInitStatus basicLoggingInit(size_t BufferSize, size_t BufferMax,
XRayLogInitStatus basicLoggingFinalize() XRAY_NEVER_INSTRUMENT {
uint8_t Expected = 0;
if (!atomic_compare_exchange_strong(
&BasicInitialized, &Expected, 0, memory_order_acq_rel) &&
if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 0,
memory_order_acq_rel) &&
Verbosity())
Report("Basic logging already finalized.\n");
@ -490,11 +494,18 @@ bool basicLogDynamicInitializer() XRAY_NEVER_INSTRUMENT {
Report("Failed initializing XRay Basic Mode; error = %d\n", InitResult);
return false;
}
static auto UNUSED Once = [] {
static auto UNUSED &TLD = getThreadLocalData();
Atexit(+[] { TLDDestructor(&TLD); });
return false;
}();
// At this point we know that we've successfully initialized Basic mode
// tracing, and the only chance we're going to get for the current thread to
// clean-up may be at thread/program exit. To ensure that we're going to get
// the cleanup even without calling the finalization routines, we're
// registering a program exit function that will do the cleanup.
static pthread_once_t DynamicOnce = PTHREAD_ONCE_INIT;
pthread_once(&DynamicOnce, +[] {
static void *FakeTLD = nullptr;
FakeTLD = &getThreadLocalData();
Atexit(+[] { TLDDestructor(FakeTLD); });
});
}
return true;
}

View File

@ -15,9 +15,10 @@
//
//===----------------------------------------------------------------------===//
#include "xray_fdr_logging.h"
#include <cassert>
#include <errno.h>
#include <limits>
#include <pthread.h>
#include <memory>
#include <sys/syscall.h>
#include <sys/time.h>
#include <time.h>
@ -28,11 +29,11 @@
#include "sanitizer_common/sanitizer_common.h"
#include "xray/xray_interface.h"
#include "xray/xray_records.h"
#include "xray_recursion_guard.h"
#include "xray_buffer_queue.h"
#include "xray_defs.h"
#include "xray_fdr_flags.h"
#include "xray_flags.h"
#include "xray_recursion_guard.h"
#include "xray_tsc.h"
#include "xray_utils.h"
@ -291,23 +292,19 @@ static void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
break;
case XRayEntryType::CUSTOM_EVENT: {
// This is a bug in patching, so we'll report it once and move on.
static bool Once = [&] {
static atomic_uint8_t ErrorLatch{0};
if (!atomic_exchange(&ErrorLatch, 1, memory_order_acq_rel))
Report("Internal error: patched an XRay custom event call as a function; "
"func id = %d\n",
FuncId);
return true;
}();
(void)Once;
return;
}
case XRayEntryType::TYPED_EVENT: {
static bool Once = [&] {
static atomic_uint8_t ErrorLatch{0};
if (!atomic_exchange(&ErrorLatch, 1, memory_order_acq_rel))
Report("Internal error: patched an XRay typed event call as a function; "
"func id = %d\n",
FuncId);
return true;
}();
(void)Once;
return;
}
}
@ -317,14 +314,8 @@ static void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
incrementExtents(sizeof(FunctionRecord));
}
static uint64_t thresholdTicks() {
static uint64_t TicksPerSec = probeRequiredCPUFeatures()
? getTSCFrequency()
: __xray::NanosecondsPerSecond;
static const uint64_t ThresholdTicks =
TicksPerSec * fdrFlags()->func_duration_threshold_us / 1000000;
return ThresholdTicks;
}
static atomic_uint64_t TicksPerSec{0};
static atomic_uint64_t ThresholdTicks{0};
// Re-point the thread local pointer into this thread's Buffer before the recent
// "Function Entry" record and any "Tail Call Exit" records after that.
@ -375,7 +366,7 @@ static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC,
"Expected funcids to match when rewinding tail call.");
// This tail call exceeded the threshold duration. It will not be erased.
if ((TSC - RewindingTSC) >= thresholdTicks()) {
if ((TSC - RewindingTSC) >= atomic_load_relaxed(&ThresholdTicks)) {
TLD.NumTailCalls = 0;
return;
}
@ -619,29 +610,26 @@ static void processFunctionHook(int32_t FuncId, XRayEntryType Entry,
case XRayEntryType::EXIT:
// Break out and write the exit record if we can't erase any functions.
if (TLD.NumConsecutiveFnEnters == 0 ||
(TSC - TLD.LastFunctionEntryTSC) >= thresholdTicks())
(TSC - TLD.LastFunctionEntryTSC) >=
atomic_load_relaxed(&ThresholdTicks))
break;
rewindRecentCall(TSC, TLD.LastTSC, TLD.LastFunctionEntryTSC, FuncId);
return; // without writing log.
case XRayEntryType::CUSTOM_EVENT: {
// This is a bug in patching, so we'll report it once and move on.
static bool Once = [&] {
static atomic_uint8_t ErrorLatch{0};
if (!atomic_exchange(&ErrorLatch, 1, memory_order_acq_rel))
Report("Internal error: patched an XRay custom event call as a function; "
"func id = %d",
"func id = %d\n",
FuncId);
return true;
}();
(void)Once;
return;
}
case XRayEntryType::TYPED_EVENT: {
static bool Once = [&] {
static atomic_uint8_t ErrorLatch{0};
if (!atomic_exchange(&ErrorLatch, 1, memory_order_acq_rel))
Report("Internal error: patched an XRay typed event call as a function; "
"func id = %d\n",
FuncId);
return true;
}();
(void)Once;
return;
}
}
@ -665,32 +653,32 @@ FDRLoggingOptions FDROptions;
SpinMutex FDROptionsMutex;
namespace {
XRayFileHeader &fdrCommonHeaderInfo() {
static XRayFileHeader Header = [] {
XRayFileHeader H;
static XRayFileHeader &fdrCommonHeaderInfo() {
static std::aligned_storage<sizeof(XRayFileHeader)>::type HStorage;
static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
static bool TSCSupported = true;
static uint64_t CycleFrequency = NanosecondsPerSecond;
pthread_once(&OnceInit, +[] {
XRayFileHeader &H = reinterpret_cast<XRayFileHeader &>(HStorage);
// Version 2 of the log writes the extents of the buffer, instead of
// relying on an end-of-buffer record.
H.Version = 2;
H.Type = FileTypes::FDR_LOG;
// Test for required CPU features and cache the cycle frequency
static bool TSCSupported = probeRequiredCPUFeatures();
static uint64_t CycleFrequency =
TSCSupported ? getTSCFrequency() : __xray::NanosecondsPerSecond;
TSCSupported = probeRequiredCPUFeatures();
if (TSCSupported)
CycleFrequency = getTSCFrequency();
H.CycleFrequency = CycleFrequency;
// FIXME: Actually check whether we have 'constant_tsc' and
// 'nonstop_tsc' before setting the values in the header.
H.ConstantTSC = 1;
H.NonstopTSC = 1;
return H;
}();
return Header;
});
return reinterpret_cast<XRayFileHeader &>(HStorage);
}
} // namespace
// This is the iterator implementation, which knows how to handle FDR-mode
// specific buffers. This is used as an implementation of the iterator function
// needed by __xray_set_buffer_iterator(...). It maintains a global state of the
@ -723,7 +711,14 @@ XRayBuffer fdrIterator(const XRayBuffer B) {
// initialized the first time this function is called. We'll update one part
// of this information with some relevant data (in particular the number of
// buffers to expect).
static XRayFileHeader Header = fdrCommonHeaderInfo();
static std::aligned_storage<sizeof(XRayFileHeader)>::type HeaderStorage;
static pthread_once_t HeaderOnce = PTHREAD_ONCE_INIT;
pthread_once(&HeaderOnce, +[] {
reinterpret_cast<XRayFileHeader &>(HeaderStorage) = fdrCommonHeaderInfo();
});
// We use a convenience alias for code referring to Header from here on out.
auto &Header = reinterpret_cast<XRayFileHeader &>(HeaderStorage);
if (B.Data == nullptr && B.Size == 0) {
Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()};
return XRayBuffer{static_cast<void *>(&Header), sizeof(Header)};
@ -732,7 +727,6 @@ XRayBuffer fdrIterator(const XRayBuffer B) {
static BufferQueue::const_iterator It{};
static BufferQueue::const_iterator End{};
if (B.Data == static_cast<void *>(&Header) && B.Size == sizeof(Header)) {
// From this point on, we provide raw access to the raw buffer we're getting
// from the BufferQueue. We're relying on the iterators from the current
// Buffer queue.
@ -787,8 +781,10 @@ XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
Report("XRay FDR: Not flushing to file, 'no_file_flush=true'.\n");
// Clean up the buffer queue, and do not bother writing out the files!
delete BQ;
BQ->~BufferQueue();
InternalFree(BQ);
BQ = nullptr;
atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
memory_order_release);
return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
@ -881,7 +877,9 @@ static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT {
TSCAndCPU Result;
// Test once for required CPU features
static bool TSCSupported = probeRequiredCPUFeatures();
static pthread_once_t OnceProbe = PTHREAD_ONCE_INIT;
static bool TSCSupported = true;
pthread_once(&OnceProbe, +[] { TSCSupported = probeRequiredCPUFeatures(); });
if (TSCSupported) {
Result.TSC = __xray::readTSC(Result.CPU);
@ -920,13 +918,8 @@ void fdrLoggingHandleCustomEvent(void *Event,
if (!Guard)
return;
if (EventSize > std::numeric_limits<int32_t>::max()) {
using Empty = struct {};
static Empty Once = [&] {
Report("Event size too large = %zu ; > max = %d\n", EventSize,
std::numeric_limits<int32_t>::max());
return Empty();
}();
(void)Once;
static pthread_once_t Once = PTHREAD_ONCE_INIT;
pthread_once(&Once, +[] { Report("Event size too large.\n"); });
}
int32_t ReducedEventSize = static_cast<int32_t>(EventSize);
auto &TLD = getThreadLocalData();
@ -970,13 +963,8 @@ void fdrLoggingHandleTypedEvent(
if (!Guard)
return;
if (EventSize > std::numeric_limits<int32_t>::max()) {
using Empty = struct {};
static Empty Once = [&] {
Report("Event size too large = %zu ; > max = %d\n", EventSize,
std::numeric_limits<int32_t>::max());
return Empty();
}();
(void)Once;
static pthread_once_t Once = PTHREAD_ONCE_INIT;
pthread_once(&Once, +[] { Report("Event size too large.\n"); });
}
int32_t ReducedEventSize = static_cast<int32_t>(EventSize);
auto &TLD = getThreadLocalData();
@ -1093,23 +1081,33 @@ XRayLogInitStatus fdrLoggingInit(size_t BufferSize, size_t BufferMax,
bool Success = false;
if (BQ != nullptr) {
delete BQ;
BQ->~BufferQueue();
InternalFree(BQ);
BQ = nullptr;
}
if (BQ == nullptr)
BQ = new BufferQueue(BufferSize, BufferMax, Success);
if (BQ == nullptr) {
BQ = reinterpret_cast<BufferQueue *>(
InternalAlloc(sizeof(BufferQueue), nullptr, 64));
new (BQ) BufferQueue(BufferSize, BufferMax, Success);
}
if (!Success) {
Report("BufferQueue init failed.\n");
if (BQ != nullptr) {
delete BQ;
BQ->~BufferQueue();
InternalFree(BQ);
BQ = nullptr;
}
return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
}
static bool UNUSED Once = [] {
static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
pthread_once(&OnceInit, +[] {
atomic_store(&TicksPerSec,
probeRequiredCPUFeatures() ? getTSCFrequency()
: __xray::NanosecondsPerSecond,
memory_order_release);
pthread_key_create(&Key, +[](void *) {
auto &TLD = getThreadLocalData();
if (TLD.BQ == nullptr)
@ -1119,9 +1117,12 @@ XRayLogInitStatus fdrLoggingInit(size_t BufferSize, size_t BufferMax,
Report("At thread exit, failed to release buffer at %p; error=%s\n",
TLD.Buffer.Data, BufferQueue::getErrorString(EC));
});
return false;
}();
});
atomic_store(&ThresholdTicks,
atomic_load_relaxed(&TicksPerSec) *
fdrFlags()->func_duration_threshold_us / 1000000,
memory_order_release);
// Arg1 handler should go in first to avoid concurrent code accidentally
// falling back to arg0 when it should have ran arg1.
__xray_set_handler_arg1(fdrLoggingHandleArg1);

View File

@ -0,0 +1,15 @@
// RUN: %clang_xray -g -o %t %s
// RUN: rm xray-log.c-test.* || true
// RUN: XRAY_OPTIONS=patch_premain=true:verbosity=1:xray_mode=xray-basic %t \
// RUN: 2>&1 | FileCheck %s
// RUN: rm xray-log.c-test.* || true
//
// REQUIRES: x86_64-target-arch
// REQUIRES: built-in-llvm-tree
__attribute__((xray_always_instrument)) void always() {}
int main() {
always();
}
// CHECK: =={{[0-9].*}}==XRay: Log file in '{{.*}}'