forked from OSchip/llvm-project
[XRay][compiler-rt] Write out arg1 payload in naive mode logging
Summary: This change allows the XRay basic (naive) mode logging implementation to start writing the payload entries through the arg1 logging handler. This implementation writes out the records that the llvm-xray tool and the trace reader library will start processing in D38550. This introduces a new payload record type which logs the data through the in-memory buffer. It uses the same size/alignment that the normal XRay record entries use. We use a new record type to indicate these new entries, so that the trace reader library in LLVM can start reading these entries. Depends on D38550. Reviewers: pelikan Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D38551 llvm-svn: 314968
This commit is contained in:
parent
0a465d7a01
commit
8dcba551d9
|
@ -67,13 +67,14 @@ static_assert(sizeof(XRayFileHeader) == 32, "XRayFileHeader != 32 bytes");
|
|||
|
||||
enum RecordTypes {
|
||||
NORMAL = 0,
|
||||
ARG_PAYLOAD = 1,
|
||||
};
|
||||
|
||||
struct alignas(32) XRayRecord {
|
||||
// This is the type of the record being written. We use 16 bits to allow us to
|
||||
// treat this as a discriminant, and so that the first 4 bytes get packed
|
||||
// properly. See RecordTypes for more supported types.
|
||||
uint16_t RecordType = 0;
|
||||
uint16_t RecordType = RecordTypes::NORMAL;
|
||||
|
||||
// The CPU where the thread is running. We assume number of CPUs <= 256.
|
||||
uint8_t CPU = 0;
|
||||
|
@ -82,6 +83,7 @@ struct alignas(32) XRayRecord {
|
|||
// ENTER = 0
|
||||
// EXIT = 1
|
||||
// TAIL_EXIT = 2
|
||||
// ENTER_ARG = 3
|
||||
uint8_t Type = 0;
|
||||
|
||||
// The function ID for the record.
|
||||
|
@ -99,6 +101,32 @@ struct alignas(32) XRayRecord {
|
|||
|
||||
static_assert(sizeof(XRayRecord) == 32, "XRayRecord != 32 bytes");
|
||||
|
||||
struct alignas(32) XRayArgPayload {
|
||||
// We use the same 16 bits as a discriminant for the records in the log here
|
||||
// too, and so that the first 4 bytes are packed properly.
|
||||
uint16_t RecordType = RecordTypes::ARG_PAYLOAD;
|
||||
|
||||
// Add a few bytes to pad.
|
||||
uint8_t Padding[2] = {};
|
||||
|
||||
// The function ID for the record.
|
||||
int32_t FuncId = 0;
|
||||
|
||||
// The thread ID for the currently running thread.
|
||||
uint32_t TId = 0;
|
||||
|
||||
// Add more padding.
|
||||
uint8_t Padding2[4] = {};
|
||||
|
||||
// The argument payload.
|
||||
uint64_t Arg = 0;
|
||||
|
||||
// The rest of this record ought to be left as padding.
|
||||
uint8_t TailPadding[8] = {};
|
||||
} __attribute__((packed));
|
||||
|
||||
static_assert(sizeof(XRayArgPayload) == 32, "XRayArgPayload != 32 bytes");
|
||||
|
||||
} // namespace __xray
|
||||
|
||||
#endif // XRAY_XRAY_RECORDS_H
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/stat.h>
|
||||
|
@ -82,14 +83,14 @@ static int __xray_OpenLogFile() XRAY_NEVER_INSTRUMENT {
|
|||
|
||||
// Test for required CPU features and cache the cycle frequency
|
||||
static bool TSCSupported = probeRequiredCPUFeatures();
|
||||
static uint64_t CycleFrequency = TSCSupported ? getTSCFrequency()
|
||||
: __xray::NanosecondsPerSecond;
|
||||
static uint64_t CycleFrequency =
|
||||
TSCSupported ? getTSCFrequency() : __xray::NanosecondsPerSecond;
|
||||
|
||||
// Since we're here, we get to write the header. We set it up so that the
|
||||
// header will only be written once, at the start, and let the threads
|
||||
// logging do writes which just append.
|
||||
XRayFileHeader Header;
|
||||
Header.Version = 2; // Version 2 includes tail exit records.
|
||||
Header.Version = 2; // Version 2 includes tail exit records.
|
||||
Header.Type = FileTypes::NAIVE_LOG;
|
||||
Header.CycleFrequency = CycleFrequency;
|
||||
|
||||
|
@ -102,26 +103,43 @@ static int __xray_OpenLogFile() XRAY_NEVER_INSTRUMENT {
|
|||
return F;
|
||||
}
|
||||
|
||||
using Buffer =
|
||||
std::aligned_storage<sizeof(XRayRecord), alignof(XRayRecord)>::type;
|
||||
|
||||
static constexpr size_t BuffLen = 1024;
|
||||
thread_local size_t Offset = 0;
|
||||
|
||||
Buffer (&getThreadLocalBuffer())[BuffLen] XRAY_NEVER_INSTRUMENT {
|
||||
thread_local static Buffer InMemoryBuffer[BuffLen] = {};
|
||||
return InMemoryBuffer;
|
||||
}
|
||||
|
||||
pid_t getTId() XRAY_NEVER_INSTRUMENT {
|
||||
thread_local pid_t TId = syscall(SYS_gettid);
|
||||
return TId;
|
||||
}
|
||||
|
||||
int getGlobalFd() XRAY_NEVER_INSTRUMENT {
|
||||
static int Fd = __xray_OpenLogFile();
|
||||
return Fd;
|
||||
}
|
||||
|
||||
thread_local volatile bool RecusionGuard = false;
|
||||
template <class RDTSC>
|
||||
void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type,
|
||||
RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT {
|
||||
using Buffer =
|
||||
std::aligned_storage<sizeof(XRayRecord), alignof(XRayRecord)>::type;
|
||||
static constexpr size_t BuffLen = 1024;
|
||||
thread_local static Buffer InMemoryBuffer[BuffLen] = {};
|
||||
thread_local static size_t Offset = 0;
|
||||
static int Fd = __xray_OpenLogFile();
|
||||
auto &InMemoryBuffer = getThreadLocalBuffer();
|
||||
int Fd = getGlobalFd();
|
||||
if (Fd == -1)
|
||||
return;
|
||||
thread_local __xray::ThreadExitFlusher Flusher(
|
||||
Fd, reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer), Offset);
|
||||
thread_local pid_t TId = syscall(SYS_gettid);
|
||||
|
||||
// Use a simple recursion guard, to handle cases where we're already logging
|
||||
// and for one reason or another, this function gets called again in the same
|
||||
// thread.
|
||||
thread_local volatile bool RecusionGuard = false;
|
||||
if (RecusionGuard) return;
|
||||
if (RecusionGuard)
|
||||
return;
|
||||
RecusionGuard = true;
|
||||
|
||||
// First we get the useful data, and stuff it into the already aligned buffer
|
||||
|
@ -129,7 +147,7 @@ void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type,
|
|||
auto &R = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer)[Offset];
|
||||
R.RecordType = RecordTypes::NORMAL;
|
||||
R.TSC = ReadTSC(R.CPU);
|
||||
R.TId = TId;
|
||||
R.TId = getTId();
|
||||
R.Type = Type;
|
||||
R.FuncId = FuncId;
|
||||
++Offset;
|
||||
|
@ -144,6 +162,55 @@ void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type,
|
|||
RecusionGuard = false;
|
||||
}
|
||||
|
||||
template <class RDTSC>
|
||||
void __xray_InMemoryRawLogWithArg(int32_t FuncId, XRayEntryType Type,
|
||||
uint64_t Arg1,
|
||||
RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT {
|
||||
auto &InMemoryBuffer = getThreadLocalBuffer();
|
||||
int Fd = getGlobalFd();
|
||||
if (Fd == -1)
|
||||
return;
|
||||
|
||||
// First we check whether there's enough space to write the data consecutively
|
||||
// in the thread-local buffer. If not, we first flush the buffer before
|
||||
// attempting to write the two records that must be consecutive.
|
||||
if (Offset + 2 > BuffLen) {
|
||||
__sanitizer::SpinMutexLock L(&LogMutex);
|
||||
auto RecordBuffer = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer);
|
||||
retryingWriteAll(Fd, reinterpret_cast<char *>(RecordBuffer),
|
||||
reinterpret_cast<char *>(RecordBuffer + Offset));
|
||||
Offset = 0;
|
||||
}
|
||||
|
||||
// Then we write the "we have an argument" record.
|
||||
__xray_InMemoryRawLog(FuncId, Type, ReadTSC);
|
||||
|
||||
if (RecusionGuard)
|
||||
return;
|
||||
|
||||
RecusionGuard = true;
|
||||
|
||||
// And from here on write the arg payload.
|
||||
__xray::XRayArgPayload R;
|
||||
R.RecordType = RecordTypes::ARG_PAYLOAD;
|
||||
R.FuncId = FuncId;
|
||||
R.TId = getTId();
|
||||
R.Arg = Arg1;
|
||||
auto EntryPtr =
|
||||
&reinterpret_cast<__xray::XRayArgPayload *>(&InMemoryBuffer)[Offset];
|
||||
std::memcpy(EntryPtr, &R, sizeof(R));
|
||||
++Offset;
|
||||
if (Offset == BuffLen) {
|
||||
__sanitizer::SpinMutexLock L(&LogMutex);
|
||||
auto RecordBuffer = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer);
|
||||
retryingWriteAll(Fd, reinterpret_cast<char *>(RecordBuffer),
|
||||
reinterpret_cast<char *>(RecordBuffer + Offset));
|
||||
Offset = 0;
|
||||
}
|
||||
|
||||
RecusionGuard = false;
|
||||
}
|
||||
|
||||
void __xray_InMemoryRawLogRealTSC(int32_t FuncId,
|
||||
XRayEntryType Type) XRAY_NEVER_INSTRUMENT {
|
||||
__xray_InMemoryRawLog(FuncId, Type, __xray::readTSC);
|
||||
|
@ -163,13 +230,38 @@ void __xray_InMemoryEmulateTSC(int32_t FuncId,
|
|||
});
|
||||
}
|
||||
|
||||
void __xray_InMemoryRawLogWithArgRealTSC(int32_t FuncId, XRayEntryType Type,
|
||||
uint64_t Arg1) XRAY_NEVER_INSTRUMENT {
|
||||
__xray_InMemoryRawLogWithArg(FuncId, Type, Arg1, __xray::readTSC);
|
||||
}
|
||||
|
||||
void __xray_InMemoryRawLogWithArgEmulateTSC(
|
||||
int32_t FuncId, XRayEntryType Type, uint64_t Arg1) XRAY_NEVER_INSTRUMENT {
|
||||
__xray_InMemoryRawLogWithArg(
|
||||
FuncId, Type, Arg1, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
|
||||
timespec TS;
|
||||
int result = clock_gettime(CLOCK_REALTIME, &TS);
|
||||
if (result != 0) {
|
||||
Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno));
|
||||
TS = {0, 0};
|
||||
}
|
||||
CPU = 0;
|
||||
return TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec;
|
||||
});
|
||||
}
|
||||
|
||||
static auto UNUSED Unused = [] {
|
||||
auto UseRealTSC = probeRequiredCPUFeatures();
|
||||
if (!UseRealTSC)
|
||||
Report("WARNING: Required CPU features missing for XRay instrumentation, "
|
||||
"using emulation instead.\n");
|
||||
if (flags()->xray_naive_log)
|
||||
if (flags()->xray_naive_log) {
|
||||
__xray_set_handler_arg1(UseRealTSC
|
||||
? __xray_InMemoryRawLogWithArgRealTSC
|
||||
: __xray_InMemoryRawLogWithArgEmulateTSC);
|
||||
__xray_set_handler(UseRealTSC ? __xray_InMemoryRawLogRealTSC
|
||||
: __xray_InMemoryEmulateTSC);
|
||||
}
|
||||
|
||||
return true;
|
||||
}();
|
||||
|
|
Loading…
Reference in New Issue