forked from OSchip/llvm-project
[XRay][compiler-rt] Remove more STL dependenices from FDR mode
Summary: This change removes dependencies on STL types: - std::aligned_storage -- we're using manually-aligned character buffers instead for metadata and function records. - std::tuple -- use a plain old struct instead. This is an incremental step in removing all STL references from the compiler-rt implementation of XRay (llvm.org/PR32274). Reviewers: dblaikie, pelikan, kpw Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D39277 llvm-svn: 316816
This commit is contained in:
parent
d0c6cf2e8c
commit
e8fec1955a
|
@ -15,15 +15,10 @@
|
||||||
//
|
//
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
#include "xray_fdr_logging.h"
|
#include "xray_fdr_logging.h"
|
||||||
#include <algorithm>
|
|
||||||
#include <bitset>
|
|
||||||
#include <cerrno>
|
|
||||||
#include <cstring>
|
|
||||||
#include <sys/syscall.h>
|
#include <sys/syscall.h>
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <unordered_map>
|
|
||||||
|
|
||||||
#include "sanitizer_common/sanitizer_atomic.h"
|
#include "sanitizer_common/sanitizer_atomic.h"
|
||||||
#include "sanitizer_common/sanitizer_common.h"
|
#include "sanitizer_common/sanitizer_common.h"
|
||||||
|
@ -176,19 +171,22 @@ XRayLogInitStatus fdrLoggingReset() XRAY_NEVER_INSTRUMENT {
|
||||||
return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
|
return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::tuple<uint64_t, unsigned char>
|
struct TSCAndCPU {
|
||||||
getTimestamp() XRAY_NEVER_INSTRUMENT {
|
uint64_t TSC;
|
||||||
|
unsigned char CPU;
|
||||||
|
};
|
||||||
|
|
||||||
|
static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT {
|
||||||
// We want to get the TSC as early as possible, so that we can check whether
|
// We want to get the TSC as early as possible, so that we can check whether
|
||||||
// we've seen this CPU before. We also do it before we load anything else, to
|
// we've seen this CPU before. We also do it before we load anything else, to
|
||||||
// allow for forward progress with the scheduling.
|
// allow for forward progress with the scheduling.
|
||||||
unsigned char CPU;
|
TSCAndCPU Result;
|
||||||
uint64_t TSC;
|
|
||||||
|
|
||||||
// Test once for required CPU features
|
// Test once for required CPU features
|
||||||
static bool TSCSupported = probeRequiredCPUFeatures();
|
static bool TSCSupported = probeRequiredCPUFeatures();
|
||||||
|
|
||||||
if (TSCSupported) {
|
if (TSCSupported) {
|
||||||
TSC = __xray::readTSC(CPU);
|
Result.TSC = __xray::readTSC(Result.CPU);
|
||||||
} else {
|
} else {
|
||||||
// FIXME: This code needs refactoring as it appears in multiple locations
|
// FIXME: This code needs refactoring as it appears in multiple locations
|
||||||
timespec TS;
|
timespec TS;
|
||||||
|
@ -197,34 +195,32 @@ getTimestamp() XRAY_NEVER_INSTRUMENT {
|
||||||
Report("clock_gettime(2) return %d, errno=%d", result, int(errno));
|
Report("clock_gettime(2) return %d, errno=%d", result, int(errno));
|
||||||
TS = {0, 0};
|
TS = {0, 0};
|
||||||
}
|
}
|
||||||
CPU = 0;
|
Result.CPU = 0;
|
||||||
TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec;
|
Result.TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec;
|
||||||
}
|
}
|
||||||
return std::make_tuple(TSC, CPU);
|
return Result;
|
||||||
}
|
}
|
||||||
|
|
||||||
void fdrLoggingHandleArg0(int32_t FuncId,
|
void fdrLoggingHandleArg0(int32_t FuncId,
|
||||||
XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
|
XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
|
||||||
auto TSC_CPU = getTimestamp();
|
auto TC = getTimestamp();
|
||||||
__xray_fdr_internal::processFunctionHook(FuncId, Entry, std::get<0>(TSC_CPU),
|
__xray_fdr_internal::processFunctionHook(FuncId, Entry, TC.TSC,
|
||||||
std::get<1>(TSC_CPU), 0,
|
TC.CPU, 0, clock_gettime, *BQ);
|
||||||
clock_gettime, *BQ);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry,
|
void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry,
|
||||||
uint64_t Arg) XRAY_NEVER_INSTRUMENT {
|
uint64_t Arg) XRAY_NEVER_INSTRUMENT {
|
||||||
auto TSC_CPU = getTimestamp();
|
auto TC = getTimestamp();
|
||||||
__xray_fdr_internal::processFunctionHook(FuncId, Entry, std::get<0>(TSC_CPU),
|
__xray_fdr_internal::processFunctionHook(
|
||||||
std::get<1>(TSC_CPU), Arg,
|
FuncId, Entry, TC.TSC, TC.CPU, Arg, clock_gettime, *BQ);
|
||||||
clock_gettime, *BQ);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void fdrLoggingHandleCustomEvent(void *Event,
|
void fdrLoggingHandleCustomEvent(void *Event,
|
||||||
std::size_t EventSize) XRAY_NEVER_INSTRUMENT {
|
std::size_t EventSize) XRAY_NEVER_INSTRUMENT {
|
||||||
using namespace __xray_fdr_internal;
|
using namespace __xray_fdr_internal;
|
||||||
auto TSC_CPU = getTimestamp();
|
auto TC = getTimestamp();
|
||||||
auto &TSC = std::get<0>(TSC_CPU);
|
auto &TSC = TC.TSC;
|
||||||
auto &CPU = std::get<1>(TSC_CPU);
|
auto &CPU = TC.CPU;
|
||||||
RecursionGuard Guard{Running};
|
RecursionGuard Guard{Running};
|
||||||
if (!Guard) {
|
if (!Guard) {
|
||||||
assert(Running && "RecursionGuard is buggy!");
|
assert(Running && "RecursionGuard is buggy!");
|
||||||
|
@ -261,7 +257,7 @@ void fdrLoggingHandleCustomEvent(void *Event,
|
||||||
CustomEvent.Type = uint8_t(RecordType::Metadata);
|
CustomEvent.Type = uint8_t(RecordType::Metadata);
|
||||||
CustomEvent.RecordKind =
|
CustomEvent.RecordKind =
|
||||||
uint8_t(MetadataRecord::RecordKinds::CustomEventMarker);
|
uint8_t(MetadataRecord::RecordKinds::CustomEventMarker);
|
||||||
constexpr auto TSCSize = sizeof(std::get<0>(TSC_CPU));
|
constexpr auto TSCSize = sizeof(TC.TSC);
|
||||||
std::memcpy(&CustomEvent.Data, &ReducedEventSize, sizeof(int32_t));
|
std::memcpy(&CustomEvent.Data, &ReducedEventSize, sizeof(int32_t));
|
||||||
std::memcpy(&CustomEvent.Data[sizeof(int32_t)], &TSC, TSCSize);
|
std::memcpy(&CustomEvent.Data[sizeof(int32_t)], &TSC, TSCSize);
|
||||||
std::memcpy(TLD.RecordPtr, &CustomEvent, sizeof(CustomEvent));
|
std::memcpy(TLD.RecordPtr, &CustomEvent, sizeof(CustomEvent));
|
||||||
|
|
|
@ -18,16 +18,17 @@
|
||||||
#define XRAY_XRAY_FDR_LOGGING_IMPL_H
|
#define XRAY_XRAY_FDR_LOGGING_IMPL_H
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <cstdint>
|
#include <cstddef>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <memory>
|
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
#include <string>
|
|
||||||
#include <sys/syscall.h>
|
#include <sys/syscall.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
|
// FIXME: Implement analogues to std::shared_ptr and std::weak_ptr
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
#include "sanitizer_common/sanitizer_common.h"
|
#include "sanitizer_common/sanitizer_common.h"
|
||||||
#include "xray/xray_log_interface.h"
|
#include "xray/xray_log_interface.h"
|
||||||
#include "xray_buffer_queue.h"
|
#include "xray_buffer_queue.h"
|
||||||
|
@ -96,7 +97,7 @@ static void writeTSCWrapMetadata(uint64_t TSC);
|
||||||
// call so that it can be initialized on first use instead of as a global. We
|
// call so that it can be initialized on first use instead of as a global. We
|
||||||
// force the alignment to 64-bytes for x86 cache line alignment, as this
|
// force the alignment to 64-bytes for x86 cache line alignment, as this
|
||||||
// structure is used in the hot path of implementation.
|
// structure is used in the hot path of implementation.
|
||||||
struct ALIGNED(64) ThreadLocalData {
|
struct alignas(64) ThreadLocalData {
|
||||||
BufferQueue::Buffer Buffer;
|
BufferQueue::Buffer Buffer;
|
||||||
char *RecordPtr = nullptr;
|
char *RecordPtr = nullptr;
|
||||||
// The number of FunctionEntry records immediately preceding RecordPtr.
|
// The number of FunctionEntry records immediately preceding RecordPtr.
|
||||||
|
@ -176,8 +177,8 @@ static ThreadLocalData &getThreadLocalData() {
|
||||||
// We need aligned, uninitialized storage for the TLS object which is
|
// We need aligned, uninitialized storage for the TLS object which is
|
||||||
// trivially destructible. We're going to use this as raw storage and
|
// trivially destructible. We're going to use this as raw storage and
|
||||||
// placement-new the ThreadLocalData object into it later.
|
// placement-new the ThreadLocalData object into it later.
|
||||||
thread_local std::aligned_storage<sizeof(ThreadLocalData),
|
alignas(alignof(ThreadLocalData)) thread_local unsigned char
|
||||||
alignof(ThreadLocalData)>::type TLSBuffer;
|
TLSBuffer[sizeof(ThreadLocalData)];
|
||||||
|
|
||||||
// Ensure that we only actually ever do the pthread initialization once.
|
// Ensure that we only actually ever do the pthread initialization once.
|
||||||
thread_local bool UNUSED Unused = [] {
|
thread_local bool UNUSED Unused = [] {
|
||||||
|
@ -215,7 +216,7 @@ static ThreadLocalData &getThreadLocalData() {
|
||||||
return true;
|
return true;
|
||||||
}();
|
}();
|
||||||
|
|
||||||
return *reinterpret_cast<ThreadLocalData *>(&TLSBuffer);
|
return *reinterpret_cast<ThreadLocalData *>(TLSBuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
//-----------------------------------------------------------------------------|
|
//-----------------------------------------------------------------------------|
|
||||||
|
@ -255,14 +256,15 @@ public:
|
||||||
inline void writeNewBufferPreamble(pid_t Tid, timespec TS,
|
inline void writeNewBufferPreamble(pid_t Tid, timespec TS,
|
||||||
char *&MemPtr) XRAY_NEVER_INSTRUMENT {
|
char *&MemPtr) XRAY_NEVER_INSTRUMENT {
|
||||||
static constexpr int InitRecordsCount = 2;
|
static constexpr int InitRecordsCount = 2;
|
||||||
std::aligned_storage<sizeof(MetadataRecord)>::type Records[InitRecordsCount];
|
alignas(alignof(MetadataRecord)) unsigned char
|
||||||
|
Records[InitRecordsCount * MetadataRecSize];
|
||||||
{
|
{
|
||||||
// Write out a MetadataRecord to signify that this is the start of a new
|
// Write out a MetadataRecord to signify that this is the start of a new
|
||||||
// buffer, associated with a particular thread, with a new CPU. For the
|
// buffer, associated with a particular thread, with a new CPU. For the
|
||||||
// data, we have 15 bytes to squeeze as much information as we can. At this
|
// data, we have 15 bytes to squeeze as much information as we can. At this
|
||||||
// point we only write down the following bytes:
|
// point we only write down the following bytes:
|
||||||
// - Thread ID (pid_t, 4 bytes)
|
// - Thread ID (pid_t, 4 bytes)
|
||||||
auto &NewBuffer = *reinterpret_cast<MetadataRecord *>(&Records[0]);
|
auto &NewBuffer = *reinterpret_cast<MetadataRecord *>(Records);
|
||||||
NewBuffer.Type = uint8_t(RecordType::Metadata);
|
NewBuffer.Type = uint8_t(RecordType::Metadata);
|
||||||
NewBuffer.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewBuffer);
|
NewBuffer.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewBuffer);
|
||||||
std::memcpy(&NewBuffer.Data, &Tid, sizeof(pid_t));
|
std::memcpy(&NewBuffer.Data, &Tid, sizeof(pid_t));
|
||||||
|
@ -270,7 +272,8 @@ inline void writeNewBufferPreamble(pid_t Tid, timespec TS,
|
||||||
// Also write the WalltimeMarker record.
|
// Also write the WalltimeMarker record.
|
||||||
{
|
{
|
||||||
static_assert(sizeof(time_t) <= 8, "time_t needs to be at most 8 bytes");
|
static_assert(sizeof(time_t) <= 8, "time_t needs to be at most 8 bytes");
|
||||||
auto &WalltimeMarker = *reinterpret_cast<MetadataRecord *>(&Records[1]);
|
auto &WalltimeMarker =
|
||||||
|
*reinterpret_cast<MetadataRecord *>(Records + MetadataRecSize);
|
||||||
WalltimeMarker.Type = uint8_t(RecordType::Metadata);
|
WalltimeMarker.Type = uint8_t(RecordType::Metadata);
|
||||||
WalltimeMarker.RecordKind =
|
WalltimeMarker.RecordKind =
|
||||||
uint8_t(MetadataRecord::RecordKinds::WalltimeMarker);
|
uint8_t(MetadataRecord::RecordKinds::WalltimeMarker);
|
||||||
|
@ -382,10 +385,7 @@ static inline void writeCallArgumentMetadata(uint64_t A) XRAY_NEVER_INSTRUMENT {
|
||||||
static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
|
static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
|
||||||
XRayEntryType EntryType,
|
XRayEntryType EntryType,
|
||||||
char *&MemPtr) XRAY_NEVER_INSTRUMENT {
|
char *&MemPtr) XRAY_NEVER_INSTRUMENT {
|
||||||
std::aligned_storage<sizeof(FunctionRecord), alignof(FunctionRecord)>::type
|
FunctionRecord FuncRecord;
|
||||||
AlignedFuncRecordBuffer;
|
|
||||||
auto &FuncRecord =
|
|
||||||
*reinterpret_cast<FunctionRecord *>(&AlignedFuncRecordBuffer);
|
|
||||||
FuncRecord.Type = uint8_t(RecordType::Function);
|
FuncRecord.Type = uint8_t(RecordType::Function);
|
||||||
// Only take 28 bits of the function id.
|
// Only take 28 bits of the function id.
|
||||||
FuncRecord.FuncId = FuncId & ~(0x0F << 28);
|
FuncRecord.FuncId = FuncId & ~(0x0F << 28);
|
||||||
|
@ -439,7 +439,7 @@ static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::memcpy(MemPtr, &AlignedFuncRecordBuffer, sizeof(FunctionRecord));
|
std::memcpy(MemPtr, &FuncRecord, sizeof(FunctionRecord));
|
||||||
MemPtr += sizeof(FunctionRecord);
|
MemPtr += sizeof(FunctionRecord);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -456,14 +456,10 @@ static uint64_t thresholdTicks() {
|
||||||
// "Function Entry" record and any "Tail Call Exit" records after that.
|
// "Function Entry" record and any "Tail Call Exit" records after that.
|
||||||
static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC,
|
static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC,
|
||||||
uint64_t &LastFunctionEntryTSC, int32_t FuncId) {
|
uint64_t &LastFunctionEntryTSC, int32_t FuncId) {
|
||||||
using AlignedFuncStorage =
|
|
||||||
std::aligned_storage<sizeof(FunctionRecord),
|
|
||||||
alignof(FunctionRecord)>::type;
|
|
||||||
auto &TLD = getThreadLocalData();
|
auto &TLD = getThreadLocalData();
|
||||||
TLD.RecordPtr -= FunctionRecSize;
|
TLD.RecordPtr -= FunctionRecSize;
|
||||||
AlignedFuncStorage AlignedFuncRecordBuffer;
|
FunctionRecord FuncRecord;
|
||||||
const auto &FuncRecord = *reinterpret_cast<FunctionRecord *>(
|
std::memcpy(&FuncRecord, TLD.RecordPtr, FunctionRecSize);
|
||||||
std::memcpy(&AlignedFuncRecordBuffer, TLD.RecordPtr, FunctionRecSize));
|
|
||||||
assert(FuncRecord.RecordKind ==
|
assert(FuncRecord.RecordKind ==
|
||||||
uint8_t(FunctionRecord::RecordKinds::FunctionEnter) &&
|
uint8_t(FunctionRecord::RecordKinds::FunctionEnter) &&
|
||||||
"Expected to find function entry recording when rewinding.");
|
"Expected to find function entry recording when rewinding.");
|
||||||
|
@ -485,20 +481,17 @@ static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC,
|
||||||
auto RewindingTSC = LastTSC;
|
auto RewindingTSC = LastTSC;
|
||||||
auto RewindingRecordPtr = TLD.RecordPtr - FunctionRecSize;
|
auto RewindingRecordPtr = TLD.RecordPtr - FunctionRecSize;
|
||||||
while (TLD.NumTailCalls > 0) {
|
while (TLD.NumTailCalls > 0) {
|
||||||
AlignedFuncStorage TailExitRecordBuffer;
|
|
||||||
// Rewind the TSC back over the TAIL EXIT record.
|
// Rewind the TSC back over the TAIL EXIT record.
|
||||||
const auto &ExpectedTailExit =
|
FunctionRecord ExpectedTailExit;
|
||||||
*reinterpret_cast<FunctionRecord *>(std::memcpy(
|
std::memcpy(&ExpectedTailExit, RewindingRecordPtr, FunctionRecSize);
|
||||||
&TailExitRecordBuffer, RewindingRecordPtr, FunctionRecSize));
|
|
||||||
|
|
||||||
assert(ExpectedTailExit.RecordKind ==
|
assert(ExpectedTailExit.RecordKind ==
|
||||||
uint8_t(FunctionRecord::RecordKinds::FunctionTailExit) &&
|
uint8_t(FunctionRecord::RecordKinds::FunctionTailExit) &&
|
||||||
"Expected to find tail exit when rewinding.");
|
"Expected to find tail exit when rewinding.");
|
||||||
RewindingRecordPtr -= FunctionRecSize;
|
RewindingRecordPtr -= FunctionRecSize;
|
||||||
RewindingTSC -= ExpectedTailExit.TSCDelta;
|
RewindingTSC -= ExpectedTailExit.TSCDelta;
|
||||||
AlignedFuncStorage FunctionEntryBuffer;
|
FunctionRecord ExpectedFunctionEntry;
|
||||||
const auto &ExpectedFunctionEntry = *reinterpret_cast<FunctionRecord *>(
|
std::memcpy(&ExpectedFunctionEntry, RewindingRecordPtr, FunctionRecSize);
|
||||||
std::memcpy(&FunctionEntryBuffer, RewindingRecordPtr, FunctionRecSize));
|
|
||||||
assert(ExpectedFunctionEntry.RecordKind ==
|
assert(ExpectedFunctionEntry.RecordKind ==
|
||||||
uint8_t(FunctionRecord::RecordKinds::FunctionEnter) &&
|
uint8_t(FunctionRecord::RecordKinds::FunctionEnter) &&
|
||||||
"Expected to find function entry when rewinding tail call.");
|
"Expected to find function entry when rewinding tail call.");
|
||||||
|
|
Loading…
Reference in New Issue