forked from OSchip/llvm-project
[XRay][compiler-rt] Write out arg1 payload in naive mode logging
Summary: This change allows the XRay basic (naive) mode logging implementation to start writing the payload entries through the arg1 logging handler. This implementation writes out the records that the llvm-xray tool and the trace reader library will start processing in D38550. This introduces a new payload record type which logs the data through the in-memory buffer. It uses the same size/alignment that the normal XRay record entries use. We use a new record type to indicate these new entries, so that the trace reader library in LLVM can start reading these entries. Depends on D38550. Reviewers: pelikan Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D38551 llvm-svn: 314968
This commit is contained in:
parent
0a465d7a01
commit
8dcba551d9
|
@ -67,13 +67,14 @@ static_assert(sizeof(XRayFileHeader) == 32, "XRayFileHeader != 32 bytes");
|
||||||
|
|
||||||
enum RecordTypes {
|
enum RecordTypes {
|
||||||
NORMAL = 0,
|
NORMAL = 0,
|
||||||
|
ARG_PAYLOAD = 1,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct alignas(32) XRayRecord {
|
struct alignas(32) XRayRecord {
|
||||||
// This is the type of the record being written. We use 16 bits to allow us to
|
// This is the type of the record being written. We use 16 bits to allow us to
|
||||||
// treat this as a discriminant, and so that the first 4 bytes get packed
|
// treat this as a discriminant, and so that the first 4 bytes get packed
|
||||||
// properly. See RecordTypes for more supported types.
|
// properly. See RecordTypes for more supported types.
|
||||||
uint16_t RecordType = 0;
|
uint16_t RecordType = RecordTypes::NORMAL;
|
||||||
|
|
||||||
// The CPU where the thread is running. We assume number of CPUs <= 256.
|
// The CPU where the thread is running. We assume number of CPUs <= 256.
|
||||||
uint8_t CPU = 0;
|
uint8_t CPU = 0;
|
||||||
|
@ -82,6 +83,7 @@ struct alignas(32) XRayRecord {
|
||||||
// ENTER = 0
|
// ENTER = 0
|
||||||
// EXIT = 1
|
// EXIT = 1
|
||||||
// TAIL_EXIT = 2
|
// TAIL_EXIT = 2
|
||||||
|
// ENTER_ARG = 3
|
||||||
uint8_t Type = 0;
|
uint8_t Type = 0;
|
||||||
|
|
||||||
// The function ID for the record.
|
// The function ID for the record.
|
||||||
|
@ -99,6 +101,32 @@ struct alignas(32) XRayRecord {
|
||||||
|
|
||||||
static_assert(sizeof(XRayRecord) == 32, "XRayRecord != 32 bytes");
|
static_assert(sizeof(XRayRecord) == 32, "XRayRecord != 32 bytes");
|
||||||
|
|
||||||
|
struct alignas(32) XRayArgPayload {
|
||||||
|
// We use the same 16 bits as a discriminant for the records in the log here
|
||||||
|
// too, and so that the first 4 bytes are packed properly.
|
||||||
|
uint16_t RecordType = RecordTypes::ARG_PAYLOAD;
|
||||||
|
|
||||||
|
// Add a few bytes to pad.
|
||||||
|
uint8_t Padding[2] = {};
|
||||||
|
|
||||||
|
// The function ID for the record.
|
||||||
|
int32_t FuncId = 0;
|
||||||
|
|
||||||
|
// The thread ID for the currently running thread.
|
||||||
|
uint32_t TId = 0;
|
||||||
|
|
||||||
|
// Add more padding.
|
||||||
|
uint8_t Padding2[4] = {};
|
||||||
|
|
||||||
|
// The argument payload.
|
||||||
|
uint64_t Arg = 0;
|
||||||
|
|
||||||
|
// The rest of this record ought to be left as padding.
|
||||||
|
uint8_t TailPadding[8] = {};
|
||||||
|
} __attribute__((packed));
|
||||||
|
|
||||||
|
static_assert(sizeof(XRayArgPayload) == 32, "XRayArgPayload != 32 bytes");
|
||||||
|
|
||||||
} // namespace __xray
|
} // namespace __xray
|
||||||
|
|
||||||
#endif // XRAY_XRAY_RECORDS_H
|
#endif // XRAY_XRAY_RECORDS_H
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
#include <cstring>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
|
@ -82,14 +83,14 @@ static int __xray_OpenLogFile() XRAY_NEVER_INSTRUMENT {
|
||||||
|
|
||||||
// Test for required CPU features and cache the cycle frequency
|
// Test for required CPU features and cache the cycle frequency
|
||||||
static bool TSCSupported = probeRequiredCPUFeatures();
|
static bool TSCSupported = probeRequiredCPUFeatures();
|
||||||
static uint64_t CycleFrequency = TSCSupported ? getTSCFrequency()
|
static uint64_t CycleFrequency =
|
||||||
: __xray::NanosecondsPerSecond;
|
TSCSupported ? getTSCFrequency() : __xray::NanosecondsPerSecond;
|
||||||
|
|
||||||
// Since we're here, we get to write the header. We set it up so that the
|
// Since we're here, we get to write the header. We set it up so that the
|
||||||
// header will only be written once, at the start, and let the threads
|
// header will only be written once, at the start, and let the threads
|
||||||
// logging do writes which just append.
|
// logging do writes which just append.
|
||||||
XRayFileHeader Header;
|
XRayFileHeader Header;
|
||||||
Header.Version = 2; // Version 2 includes tail exit records.
|
Header.Version = 2; // Version 2 includes tail exit records.
|
||||||
Header.Type = FileTypes::NAIVE_LOG;
|
Header.Type = FileTypes::NAIVE_LOG;
|
||||||
Header.CycleFrequency = CycleFrequency;
|
Header.CycleFrequency = CycleFrequency;
|
||||||
|
|
||||||
|
@ -102,26 +103,43 @@ static int __xray_OpenLogFile() XRAY_NEVER_INSTRUMENT {
|
||||||
return F;
|
return F;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
using Buffer =
|
||||||
|
std::aligned_storage<sizeof(XRayRecord), alignof(XRayRecord)>::type;
|
||||||
|
|
||||||
|
static constexpr size_t BuffLen = 1024;
|
||||||
|
thread_local size_t Offset = 0;
|
||||||
|
|
||||||
|
Buffer (&getThreadLocalBuffer())[BuffLen] XRAY_NEVER_INSTRUMENT {
|
||||||
|
thread_local static Buffer InMemoryBuffer[BuffLen] = {};
|
||||||
|
return InMemoryBuffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
pid_t getTId() XRAY_NEVER_INSTRUMENT {
|
||||||
|
thread_local pid_t TId = syscall(SYS_gettid);
|
||||||
|
return TId;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getGlobalFd() XRAY_NEVER_INSTRUMENT {
|
||||||
|
static int Fd = __xray_OpenLogFile();
|
||||||
|
return Fd;
|
||||||
|
}
|
||||||
|
|
||||||
|
thread_local volatile bool RecusionGuard = false;
|
||||||
template <class RDTSC>
|
template <class RDTSC>
|
||||||
void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type,
|
void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type,
|
||||||
RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT {
|
RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT {
|
||||||
using Buffer =
|
auto &InMemoryBuffer = getThreadLocalBuffer();
|
||||||
std::aligned_storage<sizeof(XRayRecord), alignof(XRayRecord)>::type;
|
int Fd = getGlobalFd();
|
||||||
static constexpr size_t BuffLen = 1024;
|
|
||||||
thread_local static Buffer InMemoryBuffer[BuffLen] = {};
|
|
||||||
thread_local static size_t Offset = 0;
|
|
||||||
static int Fd = __xray_OpenLogFile();
|
|
||||||
if (Fd == -1)
|
if (Fd == -1)
|
||||||
return;
|
return;
|
||||||
thread_local __xray::ThreadExitFlusher Flusher(
|
thread_local __xray::ThreadExitFlusher Flusher(
|
||||||
Fd, reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer), Offset);
|
Fd, reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer), Offset);
|
||||||
thread_local pid_t TId = syscall(SYS_gettid);
|
|
||||||
|
|
||||||
// Use a simple recursion guard, to handle cases where we're already logging
|
// Use a simple recursion guard, to handle cases where we're already logging
|
||||||
// and for one reason or another, this function gets called again in the same
|
// and for one reason or another, this function gets called again in the same
|
||||||
// thread.
|
// thread.
|
||||||
thread_local volatile bool RecusionGuard = false;
|
if (RecusionGuard)
|
||||||
if (RecusionGuard) return;
|
return;
|
||||||
RecusionGuard = true;
|
RecusionGuard = true;
|
||||||
|
|
||||||
// First we get the useful data, and stuff it into the already aligned buffer
|
// First we get the useful data, and stuff it into the already aligned buffer
|
||||||
|
@ -129,7 +147,7 @@ void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type,
|
||||||
auto &R = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer)[Offset];
|
auto &R = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer)[Offset];
|
||||||
R.RecordType = RecordTypes::NORMAL;
|
R.RecordType = RecordTypes::NORMAL;
|
||||||
R.TSC = ReadTSC(R.CPU);
|
R.TSC = ReadTSC(R.CPU);
|
||||||
R.TId = TId;
|
R.TId = getTId();
|
||||||
R.Type = Type;
|
R.Type = Type;
|
||||||
R.FuncId = FuncId;
|
R.FuncId = FuncId;
|
||||||
++Offset;
|
++Offset;
|
||||||
|
@ -144,6 +162,55 @@ void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type,
|
||||||
RecusionGuard = false;
|
RecusionGuard = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class RDTSC>
|
||||||
|
void __xray_InMemoryRawLogWithArg(int32_t FuncId, XRayEntryType Type,
|
||||||
|
uint64_t Arg1,
|
||||||
|
RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT {
|
||||||
|
auto &InMemoryBuffer = getThreadLocalBuffer();
|
||||||
|
int Fd = getGlobalFd();
|
||||||
|
if (Fd == -1)
|
||||||
|
return;
|
||||||
|
|
||||||
|
// First we check whether there's enough space to write the data consecutively
|
||||||
|
// in the thread-local buffer. If not, we first flush the buffer before
|
||||||
|
// attempting to write the two records that must be consecutive.
|
||||||
|
if (Offset + 2 > BuffLen) {
|
||||||
|
__sanitizer::SpinMutexLock L(&LogMutex);
|
||||||
|
auto RecordBuffer = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer);
|
||||||
|
retryingWriteAll(Fd, reinterpret_cast<char *>(RecordBuffer),
|
||||||
|
reinterpret_cast<char *>(RecordBuffer + Offset));
|
||||||
|
Offset = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Then we write the "we have an argument" record.
|
||||||
|
__xray_InMemoryRawLog(FuncId, Type, ReadTSC);
|
||||||
|
|
||||||
|
if (RecusionGuard)
|
||||||
|
return;
|
||||||
|
|
||||||
|
RecusionGuard = true;
|
||||||
|
|
||||||
|
// And from here on write the arg payload.
|
||||||
|
__xray::XRayArgPayload R;
|
||||||
|
R.RecordType = RecordTypes::ARG_PAYLOAD;
|
||||||
|
R.FuncId = FuncId;
|
||||||
|
R.TId = getTId();
|
||||||
|
R.Arg = Arg1;
|
||||||
|
auto EntryPtr =
|
||||||
|
&reinterpret_cast<__xray::XRayArgPayload *>(&InMemoryBuffer)[Offset];
|
||||||
|
std::memcpy(EntryPtr, &R, sizeof(R));
|
||||||
|
++Offset;
|
||||||
|
if (Offset == BuffLen) {
|
||||||
|
__sanitizer::SpinMutexLock L(&LogMutex);
|
||||||
|
auto RecordBuffer = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer);
|
||||||
|
retryingWriteAll(Fd, reinterpret_cast<char *>(RecordBuffer),
|
||||||
|
reinterpret_cast<char *>(RecordBuffer + Offset));
|
||||||
|
Offset = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
RecusionGuard = false;
|
||||||
|
}
|
||||||
|
|
||||||
void __xray_InMemoryRawLogRealTSC(int32_t FuncId,
|
void __xray_InMemoryRawLogRealTSC(int32_t FuncId,
|
||||||
XRayEntryType Type) XRAY_NEVER_INSTRUMENT {
|
XRayEntryType Type) XRAY_NEVER_INSTRUMENT {
|
||||||
__xray_InMemoryRawLog(FuncId, Type, __xray::readTSC);
|
__xray_InMemoryRawLog(FuncId, Type, __xray::readTSC);
|
||||||
|
@ -163,13 +230,38 @@ void __xray_InMemoryEmulateTSC(int32_t FuncId,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void __xray_InMemoryRawLogWithArgRealTSC(int32_t FuncId, XRayEntryType Type,
|
||||||
|
uint64_t Arg1) XRAY_NEVER_INSTRUMENT {
|
||||||
|
__xray_InMemoryRawLogWithArg(FuncId, Type, Arg1, __xray::readTSC);
|
||||||
|
}
|
||||||
|
|
||||||
|
void __xray_InMemoryRawLogWithArgEmulateTSC(
|
||||||
|
int32_t FuncId, XRayEntryType Type, uint64_t Arg1) XRAY_NEVER_INSTRUMENT {
|
||||||
|
__xray_InMemoryRawLogWithArg(
|
||||||
|
FuncId, Type, Arg1, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
|
||||||
|
timespec TS;
|
||||||
|
int result = clock_gettime(CLOCK_REALTIME, &TS);
|
||||||
|
if (result != 0) {
|
||||||
|
Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno));
|
||||||
|
TS = {0, 0};
|
||||||
|
}
|
||||||
|
CPU = 0;
|
||||||
|
return TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
static auto UNUSED Unused = [] {
|
static auto UNUSED Unused = [] {
|
||||||
auto UseRealTSC = probeRequiredCPUFeatures();
|
auto UseRealTSC = probeRequiredCPUFeatures();
|
||||||
if (!UseRealTSC)
|
if (!UseRealTSC)
|
||||||
Report("WARNING: Required CPU features missing for XRay instrumentation, "
|
Report("WARNING: Required CPU features missing for XRay instrumentation, "
|
||||||
"using emulation instead.\n");
|
"using emulation instead.\n");
|
||||||
if (flags()->xray_naive_log)
|
if (flags()->xray_naive_log) {
|
||||||
|
__xray_set_handler_arg1(UseRealTSC
|
||||||
|
? __xray_InMemoryRawLogWithArgRealTSC
|
||||||
|
: __xray_InMemoryRawLogWithArgEmulateTSC);
|
||||||
__xray_set_handler(UseRealTSC ? __xray_InMemoryRawLogRealTSC
|
__xray_set_handler(UseRealTSC ? __xray_InMemoryRawLogRealTSC
|
||||||
: __xray_InMemoryEmulateTSC);
|
: __xray_InMemoryEmulateTSC);
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}();
|
}();
|
||||||
|
|
Loading…
Reference in New Issue