[XRay][compiler-rt] Write out arg1 payload in naive mode logging

Summary:
This change allows the XRay basic (naive) mode logging implementation to
start writing the payload entries through the arg1 logging handler. This
implementation writes out the records that the llvm-xray tool and the
trace reader library will start processing in D38550.

This introduces a new payload record type which logs the data through
the in-memory buffer. It uses the same size/alignment that the normal
XRay record entries use. We use a new record type to indicate these new
entries, so that the trace reader library in LLVM can start reading
these entries.

Depends on D38550.

Reviewers: pelikan

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D38551

llvm-svn: 314968
This commit is contained in:
Dean Michael Berris 2017-10-05 05:45:51 +00:00
parent 0a465d7a01
commit 8dcba551d9
2 changed files with 135 additions and 15 deletions

View File

@ -67,13 +67,14 @@ static_assert(sizeof(XRayFileHeader) == 32, "XRayFileHeader != 32 bytes");
enum RecordTypes { enum RecordTypes {
NORMAL = 0, NORMAL = 0,
ARG_PAYLOAD = 1,
}; };
struct alignas(32) XRayRecord { struct alignas(32) XRayRecord {
// This is the type of the record being written. We use 16 bits to allow us to // This is the type of the record being written. We use 16 bits to allow us to
// treat this as a discriminant, and so that the first 4 bytes get packed // treat this as a discriminant, and so that the first 4 bytes get packed
// properly. See RecordTypes for more supported types. // properly. See RecordTypes for more supported types.
uint16_t RecordType = 0; uint16_t RecordType = RecordTypes::NORMAL;
// The CPU where the thread is running. We assume number of CPUs <= 256. // The CPU where the thread is running. We assume number of CPUs <= 256.
uint8_t CPU = 0; uint8_t CPU = 0;
@ -82,6 +83,7 @@ struct alignas(32) XRayRecord {
// ENTER = 0 // ENTER = 0
// EXIT = 1 // EXIT = 1
// TAIL_EXIT = 2 // TAIL_EXIT = 2
// ENTER_ARG = 3
uint8_t Type = 0; uint8_t Type = 0;
// The function ID for the record. // The function ID for the record.
@ -99,6 +101,32 @@ struct alignas(32) XRayRecord {
static_assert(sizeof(XRayRecord) == 32, "XRayRecord != 32 bytes"); static_assert(sizeof(XRayRecord) == 32, "XRayRecord != 32 bytes");
struct alignas(32) XRayArgPayload {
// We use the same 16 bits as a discriminant for the records in the log here
// too, and so that the first 4 bytes are packed properly.
uint16_t RecordType = RecordTypes::ARG_PAYLOAD;
// Add a few bytes to pad.
uint8_t Padding[2] = {};
// The function ID for the record.
int32_t FuncId = 0;
// The thread ID for the currently running thread.
uint32_t TId = 0;
// Add more padding.
uint8_t Padding2[4] = {};
// The argument payload.
uint64_t Arg = 0;
// The rest of this record ought to be left as padding.
uint8_t TailPadding[8] = {};
} __attribute__((packed));
static_assert(sizeof(XRayArgPayload) == 32, "XRayArgPayload != 32 bytes");
} // namespace __xray } // namespace __xray
#endif // XRAY_XRAY_RECORDS_H #endif // XRAY_XRAY_RECORDS_H

View File

@ -16,6 +16,7 @@
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include <cassert> #include <cassert>
#include <cstring>
#include <errno.h> #include <errno.h>
#include <fcntl.h> #include <fcntl.h>
#include <sys/stat.h> #include <sys/stat.h>
@ -82,14 +83,14 @@ static int __xray_OpenLogFile() XRAY_NEVER_INSTRUMENT {
// Test for required CPU features and cache the cycle frequency // Test for required CPU features and cache the cycle frequency
static bool TSCSupported = probeRequiredCPUFeatures(); static bool TSCSupported = probeRequiredCPUFeatures();
static uint64_t CycleFrequency = TSCSupported ? getTSCFrequency() static uint64_t CycleFrequency =
: __xray::NanosecondsPerSecond; TSCSupported ? getTSCFrequency() : __xray::NanosecondsPerSecond;
// Since we're here, we get to write the header. We set it up so that the // Since we're here, we get to write the header. We set it up so that the
// header will only be written once, at the start, and let the threads // header will only be written once, at the start, and let the threads
// logging do writes which just append. // logging do writes which just append.
XRayFileHeader Header; XRayFileHeader Header;
Header.Version = 2; // Version 2 includes tail exit records. Header.Version = 2; // Version 2 includes tail exit records.
Header.Type = FileTypes::NAIVE_LOG; Header.Type = FileTypes::NAIVE_LOG;
Header.CycleFrequency = CycleFrequency; Header.CycleFrequency = CycleFrequency;
@ -102,26 +103,43 @@ static int __xray_OpenLogFile() XRAY_NEVER_INSTRUMENT {
return F; return F;
} }
using Buffer =
std::aligned_storage<sizeof(XRayRecord), alignof(XRayRecord)>::type;
static constexpr size_t BuffLen = 1024;
thread_local size_t Offset = 0;
Buffer (&getThreadLocalBuffer())[BuffLen] XRAY_NEVER_INSTRUMENT {
thread_local static Buffer InMemoryBuffer[BuffLen] = {};
return InMemoryBuffer;
}
pid_t getTId() XRAY_NEVER_INSTRUMENT {
thread_local pid_t TId = syscall(SYS_gettid);
return TId;
}
int getGlobalFd() XRAY_NEVER_INSTRUMENT {
static int Fd = __xray_OpenLogFile();
return Fd;
}
thread_local volatile bool RecusionGuard = false;
template <class RDTSC> template <class RDTSC>
void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type, void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type,
RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT { RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT {
using Buffer = auto &InMemoryBuffer = getThreadLocalBuffer();
std::aligned_storage<sizeof(XRayRecord), alignof(XRayRecord)>::type; int Fd = getGlobalFd();
static constexpr size_t BuffLen = 1024;
thread_local static Buffer InMemoryBuffer[BuffLen] = {};
thread_local static size_t Offset = 0;
static int Fd = __xray_OpenLogFile();
if (Fd == -1) if (Fd == -1)
return; return;
thread_local __xray::ThreadExitFlusher Flusher( thread_local __xray::ThreadExitFlusher Flusher(
Fd, reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer), Offset); Fd, reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer), Offset);
thread_local pid_t TId = syscall(SYS_gettid);
// Use a simple recursion guard, to handle cases where we're already logging // Use a simple recursion guard, to handle cases where we're already logging
// and for one reason or another, this function gets called again in the same // and for one reason or another, this function gets called again in the same
// thread. // thread.
thread_local volatile bool RecusionGuard = false; if (RecusionGuard)
if (RecusionGuard) return; return;
RecusionGuard = true; RecusionGuard = true;
// First we get the useful data, and stuff it into the already aligned buffer // First we get the useful data, and stuff it into the already aligned buffer
@ -129,7 +147,7 @@ void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type,
auto &R = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer)[Offset]; auto &R = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer)[Offset];
R.RecordType = RecordTypes::NORMAL; R.RecordType = RecordTypes::NORMAL;
R.TSC = ReadTSC(R.CPU); R.TSC = ReadTSC(R.CPU);
R.TId = TId; R.TId = getTId();
R.Type = Type; R.Type = Type;
R.FuncId = FuncId; R.FuncId = FuncId;
++Offset; ++Offset;
@ -144,6 +162,55 @@ void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type,
RecusionGuard = false; RecusionGuard = false;
} }
template <class RDTSC>
void __xray_InMemoryRawLogWithArg(int32_t FuncId, XRayEntryType Type,
uint64_t Arg1,
RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT {
auto &InMemoryBuffer = getThreadLocalBuffer();
int Fd = getGlobalFd();
if (Fd == -1)
return;
// First we check whether there's enough space to write the data consecutively
// in the thread-local buffer. If not, we first flush the buffer before
// attempting to write the two records that must be consecutive.
if (Offset + 2 > BuffLen) {
__sanitizer::SpinMutexLock L(&LogMutex);
auto RecordBuffer = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer);
retryingWriteAll(Fd, reinterpret_cast<char *>(RecordBuffer),
reinterpret_cast<char *>(RecordBuffer + Offset));
Offset = 0;
}
// Then we write the "we have an argument" record.
__xray_InMemoryRawLog(FuncId, Type, ReadTSC);
if (RecusionGuard)
return;
RecusionGuard = true;
// And from here on write the arg payload.
__xray::XRayArgPayload R;
R.RecordType = RecordTypes::ARG_PAYLOAD;
R.FuncId = FuncId;
R.TId = getTId();
R.Arg = Arg1;
auto EntryPtr =
&reinterpret_cast<__xray::XRayArgPayload *>(&InMemoryBuffer)[Offset];
std::memcpy(EntryPtr, &R, sizeof(R));
++Offset;
if (Offset == BuffLen) {
__sanitizer::SpinMutexLock L(&LogMutex);
auto RecordBuffer = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer);
retryingWriteAll(Fd, reinterpret_cast<char *>(RecordBuffer),
reinterpret_cast<char *>(RecordBuffer + Offset));
Offset = 0;
}
RecusionGuard = false;
}
void __xray_InMemoryRawLogRealTSC(int32_t FuncId, void __xray_InMemoryRawLogRealTSC(int32_t FuncId,
XRayEntryType Type) XRAY_NEVER_INSTRUMENT { XRayEntryType Type) XRAY_NEVER_INSTRUMENT {
__xray_InMemoryRawLog(FuncId, Type, __xray::readTSC); __xray_InMemoryRawLog(FuncId, Type, __xray::readTSC);
@ -163,13 +230,38 @@ void __xray_InMemoryEmulateTSC(int32_t FuncId,
}); });
} }
void __xray_InMemoryRawLogWithArgRealTSC(int32_t FuncId, XRayEntryType Type,
uint64_t Arg1) XRAY_NEVER_INSTRUMENT {
__xray_InMemoryRawLogWithArg(FuncId, Type, Arg1, __xray::readTSC);
}
void __xray_InMemoryRawLogWithArgEmulateTSC(
int32_t FuncId, XRayEntryType Type, uint64_t Arg1) XRAY_NEVER_INSTRUMENT {
__xray_InMemoryRawLogWithArg(
FuncId, Type, Arg1, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
timespec TS;
int result = clock_gettime(CLOCK_REALTIME, &TS);
if (result != 0) {
Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno));
TS = {0, 0};
}
CPU = 0;
return TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec;
});
}
static auto UNUSED Unused = [] { static auto UNUSED Unused = [] {
auto UseRealTSC = probeRequiredCPUFeatures(); auto UseRealTSC = probeRequiredCPUFeatures();
if (!UseRealTSC) if (!UseRealTSC)
Report("WARNING: Required CPU features missing for XRay instrumentation, " Report("WARNING: Required CPU features missing for XRay instrumentation, "
"using emulation instead.\n"); "using emulation instead.\n");
if (flags()->xray_naive_log) if (flags()->xray_naive_log) {
__xray_set_handler_arg1(UseRealTSC
? __xray_InMemoryRawLogWithArgRealTSC
: __xray_InMemoryRawLogWithArgEmulateTSC);
__xray_set_handler(UseRealTSC ? __xray_InMemoryRawLogRealTSC __xray_set_handler(UseRealTSC ? __xray_InMemoryRawLogRealTSC
: __xray_InMemoryEmulateTSC); : __xray_InMemoryEmulateTSC);
}
return true; return true;
}(); }();