[XRay][compiler-rt] Remove more STL dependenices from FDR mode

Summary:
This change removes dependencies on STL types:

  - std::aligned_storage -- we're using manually-aligned character
    buffers instead for metadata and function records.

  - std::tuple -- use a plain old struct instead.

This is an incremental step in removing all STL references from the
compiler-rt implementation of XRay (llvm.org/PR32274).

Reviewers: dblaikie, pelikan, kpw

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D39277

llvm-svn: 316816
This commit is contained in:
Dean Michael Berris 2017-10-27 23:59:41 +00:00
parent d0c6cf2e8c
commit e8fec1955a
2 changed files with 42 additions and 53 deletions

View File

@ -15,15 +15,10 @@
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "xray_fdr_logging.h" #include "xray_fdr_logging.h"
#include <algorithm>
#include <bitset>
#include <cerrno>
#include <cstring>
#include <sys/syscall.h> #include <sys/syscall.h>
#include <sys/time.h> #include <sys/time.h>
#include <time.h> #include <time.h>
#include <unistd.h> #include <unistd.h>
#include <unordered_map>
#include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_atomic.h"
#include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_common.h"
@ -176,19 +171,22 @@ XRayLogInitStatus fdrLoggingReset() XRAY_NEVER_INSTRUMENT {
return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
} }
static std::tuple<uint64_t, unsigned char> struct TSCAndCPU {
getTimestamp() XRAY_NEVER_INSTRUMENT { uint64_t TSC;
unsigned char CPU;
};
static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT {
// We want to get the TSC as early as possible, so that we can check whether // We want to get the TSC as early as possible, so that we can check whether
// we've seen this CPU before. We also do it before we load anything else, to // we've seen this CPU before. We also do it before we load anything else, to
// allow for forward progress with the scheduling. // allow for forward progress with the scheduling.
unsigned char CPU; TSCAndCPU Result;
uint64_t TSC;
// Test once for required CPU features // Test once for required CPU features
static bool TSCSupported = probeRequiredCPUFeatures(); static bool TSCSupported = probeRequiredCPUFeatures();
if (TSCSupported) { if (TSCSupported) {
TSC = __xray::readTSC(CPU); Result.TSC = __xray::readTSC(Result.CPU);
} else { } else {
// FIXME: This code needs refactoring as it appears in multiple locations // FIXME: This code needs refactoring as it appears in multiple locations
timespec TS; timespec TS;
@ -197,34 +195,32 @@ getTimestamp() XRAY_NEVER_INSTRUMENT {
Report("clock_gettime(2) return %d, errno=%d", result, int(errno)); Report("clock_gettime(2) return %d, errno=%d", result, int(errno));
TS = {0, 0}; TS = {0, 0};
} }
CPU = 0; Result.CPU = 0;
TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec; Result.TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec;
} }
return std::make_tuple(TSC, CPU); return Result;
} }
void fdrLoggingHandleArg0(int32_t FuncId, void fdrLoggingHandleArg0(int32_t FuncId,
XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
auto TSC_CPU = getTimestamp(); auto TC = getTimestamp();
__xray_fdr_internal::processFunctionHook(FuncId, Entry, std::get<0>(TSC_CPU), __xray_fdr_internal::processFunctionHook(FuncId, Entry, TC.TSC,
std::get<1>(TSC_CPU), 0, TC.CPU, 0, clock_gettime, *BQ);
clock_gettime, *BQ);
} }
void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry, void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry,
uint64_t Arg) XRAY_NEVER_INSTRUMENT { uint64_t Arg) XRAY_NEVER_INSTRUMENT {
auto TSC_CPU = getTimestamp(); auto TC = getTimestamp();
__xray_fdr_internal::processFunctionHook(FuncId, Entry, std::get<0>(TSC_CPU), __xray_fdr_internal::processFunctionHook(
std::get<1>(TSC_CPU), Arg, FuncId, Entry, TC.TSC, TC.CPU, Arg, clock_gettime, *BQ);
clock_gettime, *BQ);
} }
void fdrLoggingHandleCustomEvent(void *Event, void fdrLoggingHandleCustomEvent(void *Event,
std::size_t EventSize) XRAY_NEVER_INSTRUMENT { std::size_t EventSize) XRAY_NEVER_INSTRUMENT {
using namespace __xray_fdr_internal; using namespace __xray_fdr_internal;
auto TSC_CPU = getTimestamp(); auto TC = getTimestamp();
auto &TSC = std::get<0>(TSC_CPU); auto &TSC = TC.TSC;
auto &CPU = std::get<1>(TSC_CPU); auto &CPU = TC.CPU;
RecursionGuard Guard{Running}; RecursionGuard Guard{Running};
if (!Guard) { if (!Guard) {
assert(Running && "RecursionGuard is buggy!"); assert(Running && "RecursionGuard is buggy!");
@ -261,7 +257,7 @@ void fdrLoggingHandleCustomEvent(void *Event,
CustomEvent.Type = uint8_t(RecordType::Metadata); CustomEvent.Type = uint8_t(RecordType::Metadata);
CustomEvent.RecordKind = CustomEvent.RecordKind =
uint8_t(MetadataRecord::RecordKinds::CustomEventMarker); uint8_t(MetadataRecord::RecordKinds::CustomEventMarker);
constexpr auto TSCSize = sizeof(std::get<0>(TSC_CPU)); constexpr auto TSCSize = sizeof(TC.TSC);
std::memcpy(&CustomEvent.Data, &ReducedEventSize, sizeof(int32_t)); std::memcpy(&CustomEvent.Data, &ReducedEventSize, sizeof(int32_t));
std::memcpy(&CustomEvent.Data[sizeof(int32_t)], &TSC, TSCSize); std::memcpy(&CustomEvent.Data[sizeof(int32_t)], &TSC, TSCSize);
std::memcpy(TLD.RecordPtr, &CustomEvent, sizeof(CustomEvent)); std::memcpy(TLD.RecordPtr, &CustomEvent, sizeof(CustomEvent));

View File

@ -18,16 +18,17 @@
#define XRAY_XRAY_FDR_LOGGING_IMPL_H #define XRAY_XRAY_FDR_LOGGING_IMPL_H
#include <cassert> #include <cassert>
#include <cstdint> #include <cstddef>
#include <cstring> #include <cstring>
#include <limits> #include <limits>
#include <memory>
#include <pthread.h> #include <pthread.h>
#include <string>
#include <sys/syscall.h> #include <sys/syscall.h>
#include <time.h> #include <time.h>
#include <unistd.h> #include <unistd.h>
// FIXME: Implement analogues to std::shared_ptr and std::weak_ptr
#include <memory>
#include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_common.h"
#include "xray/xray_log_interface.h" #include "xray/xray_log_interface.h"
#include "xray_buffer_queue.h" #include "xray_buffer_queue.h"
@ -96,7 +97,7 @@ static void writeTSCWrapMetadata(uint64_t TSC);
// call so that it can be initialized on first use instead of as a global. We // call so that it can be initialized on first use instead of as a global. We
// force the alignment to 64-bytes for x86 cache line alignment, as this // force the alignment to 64-bytes for x86 cache line alignment, as this
// structure is used in the hot path of implementation. // structure is used in the hot path of implementation.
struct ALIGNED(64) ThreadLocalData { struct alignas(64) ThreadLocalData {
BufferQueue::Buffer Buffer; BufferQueue::Buffer Buffer;
char *RecordPtr = nullptr; char *RecordPtr = nullptr;
// The number of FunctionEntry records immediately preceding RecordPtr. // The number of FunctionEntry records immediately preceding RecordPtr.
@ -176,8 +177,8 @@ static ThreadLocalData &getThreadLocalData() {
// We need aligned, uninitialized storage for the TLS object which is // We need aligned, uninitialized storage for the TLS object which is
// trivially destructible. We're going to use this as raw storage and // trivially destructible. We're going to use this as raw storage and
// placement-new the ThreadLocalData object into it later. // placement-new the ThreadLocalData object into it later.
thread_local std::aligned_storage<sizeof(ThreadLocalData), alignas(alignof(ThreadLocalData)) thread_local unsigned char
alignof(ThreadLocalData)>::type TLSBuffer; TLSBuffer[sizeof(ThreadLocalData)];
// Ensure that we only actually ever do the pthread initialization once. // Ensure that we only actually ever do the pthread initialization once.
thread_local bool UNUSED Unused = [] { thread_local bool UNUSED Unused = [] {
@ -215,7 +216,7 @@ static ThreadLocalData &getThreadLocalData() {
return true; return true;
}(); }();
return *reinterpret_cast<ThreadLocalData *>(&TLSBuffer); return *reinterpret_cast<ThreadLocalData *>(TLSBuffer);
} }
//-----------------------------------------------------------------------------| //-----------------------------------------------------------------------------|
@ -255,14 +256,15 @@ public:
inline void writeNewBufferPreamble(pid_t Tid, timespec TS, inline void writeNewBufferPreamble(pid_t Tid, timespec TS,
char *&MemPtr) XRAY_NEVER_INSTRUMENT { char *&MemPtr) XRAY_NEVER_INSTRUMENT {
static constexpr int InitRecordsCount = 2; static constexpr int InitRecordsCount = 2;
std::aligned_storage<sizeof(MetadataRecord)>::type Records[InitRecordsCount]; alignas(alignof(MetadataRecord)) unsigned char
Records[InitRecordsCount * MetadataRecSize];
{ {
// Write out a MetadataRecord to signify that this is the start of a new // Write out a MetadataRecord to signify that this is the start of a new
// buffer, associated with a particular thread, with a new CPU. For the // buffer, associated with a particular thread, with a new CPU. For the
// data, we have 15 bytes to squeeze as much information as we can. At this // data, we have 15 bytes to squeeze as much information as we can. At this
// point we only write down the following bytes: // point we only write down the following bytes:
// - Thread ID (pid_t, 4 bytes) // - Thread ID (pid_t, 4 bytes)
auto &NewBuffer = *reinterpret_cast<MetadataRecord *>(&Records[0]); auto &NewBuffer = *reinterpret_cast<MetadataRecord *>(Records);
NewBuffer.Type = uint8_t(RecordType::Metadata); NewBuffer.Type = uint8_t(RecordType::Metadata);
NewBuffer.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewBuffer); NewBuffer.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewBuffer);
std::memcpy(&NewBuffer.Data, &Tid, sizeof(pid_t)); std::memcpy(&NewBuffer.Data, &Tid, sizeof(pid_t));
@ -270,7 +272,8 @@ inline void writeNewBufferPreamble(pid_t Tid, timespec TS,
// Also write the WalltimeMarker record. // Also write the WalltimeMarker record.
{ {
static_assert(sizeof(time_t) <= 8, "time_t needs to be at most 8 bytes"); static_assert(sizeof(time_t) <= 8, "time_t needs to be at most 8 bytes");
auto &WalltimeMarker = *reinterpret_cast<MetadataRecord *>(&Records[1]); auto &WalltimeMarker =
*reinterpret_cast<MetadataRecord *>(Records + MetadataRecSize);
WalltimeMarker.Type = uint8_t(RecordType::Metadata); WalltimeMarker.Type = uint8_t(RecordType::Metadata);
WalltimeMarker.RecordKind = WalltimeMarker.RecordKind =
uint8_t(MetadataRecord::RecordKinds::WalltimeMarker); uint8_t(MetadataRecord::RecordKinds::WalltimeMarker);
@ -382,10 +385,7 @@ static inline void writeCallArgumentMetadata(uint64_t A) XRAY_NEVER_INSTRUMENT {
static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta, static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
XRayEntryType EntryType, XRayEntryType EntryType,
char *&MemPtr) XRAY_NEVER_INSTRUMENT { char *&MemPtr) XRAY_NEVER_INSTRUMENT {
std::aligned_storage<sizeof(FunctionRecord), alignof(FunctionRecord)>::type FunctionRecord FuncRecord;
AlignedFuncRecordBuffer;
auto &FuncRecord =
*reinterpret_cast<FunctionRecord *>(&AlignedFuncRecordBuffer);
FuncRecord.Type = uint8_t(RecordType::Function); FuncRecord.Type = uint8_t(RecordType::Function);
// Only take 28 bits of the function id. // Only take 28 bits of the function id.
FuncRecord.FuncId = FuncId & ~(0x0F << 28); FuncRecord.FuncId = FuncId & ~(0x0F << 28);
@ -439,7 +439,7 @@ static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
} }
} }
std::memcpy(MemPtr, &AlignedFuncRecordBuffer, sizeof(FunctionRecord)); std::memcpy(MemPtr, &FuncRecord, sizeof(FunctionRecord));
MemPtr += sizeof(FunctionRecord); MemPtr += sizeof(FunctionRecord);
} }
@ -456,14 +456,10 @@ static uint64_t thresholdTicks() {
// "Function Entry" record and any "Tail Call Exit" records after that. // "Function Entry" record and any "Tail Call Exit" records after that.
static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC, static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC,
uint64_t &LastFunctionEntryTSC, int32_t FuncId) { uint64_t &LastFunctionEntryTSC, int32_t FuncId) {
using AlignedFuncStorage =
std::aligned_storage<sizeof(FunctionRecord),
alignof(FunctionRecord)>::type;
auto &TLD = getThreadLocalData(); auto &TLD = getThreadLocalData();
TLD.RecordPtr -= FunctionRecSize; TLD.RecordPtr -= FunctionRecSize;
AlignedFuncStorage AlignedFuncRecordBuffer; FunctionRecord FuncRecord;
const auto &FuncRecord = *reinterpret_cast<FunctionRecord *>( std::memcpy(&FuncRecord, TLD.RecordPtr, FunctionRecSize);
std::memcpy(&AlignedFuncRecordBuffer, TLD.RecordPtr, FunctionRecSize));
assert(FuncRecord.RecordKind == assert(FuncRecord.RecordKind ==
uint8_t(FunctionRecord::RecordKinds::FunctionEnter) && uint8_t(FunctionRecord::RecordKinds::FunctionEnter) &&
"Expected to find function entry recording when rewinding."); "Expected to find function entry recording when rewinding.");
@ -485,20 +481,17 @@ static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC,
auto RewindingTSC = LastTSC; auto RewindingTSC = LastTSC;
auto RewindingRecordPtr = TLD.RecordPtr - FunctionRecSize; auto RewindingRecordPtr = TLD.RecordPtr - FunctionRecSize;
while (TLD.NumTailCalls > 0) { while (TLD.NumTailCalls > 0) {
AlignedFuncStorage TailExitRecordBuffer;
// Rewind the TSC back over the TAIL EXIT record. // Rewind the TSC back over the TAIL EXIT record.
const auto &ExpectedTailExit = FunctionRecord ExpectedTailExit;
*reinterpret_cast<FunctionRecord *>(std::memcpy( std::memcpy(&ExpectedTailExit, RewindingRecordPtr, FunctionRecSize);
&TailExitRecordBuffer, RewindingRecordPtr, FunctionRecSize));
assert(ExpectedTailExit.RecordKind == assert(ExpectedTailExit.RecordKind ==
uint8_t(FunctionRecord::RecordKinds::FunctionTailExit) && uint8_t(FunctionRecord::RecordKinds::FunctionTailExit) &&
"Expected to find tail exit when rewinding."); "Expected to find tail exit when rewinding.");
RewindingRecordPtr -= FunctionRecSize; RewindingRecordPtr -= FunctionRecSize;
RewindingTSC -= ExpectedTailExit.TSCDelta; RewindingTSC -= ExpectedTailExit.TSCDelta;
AlignedFuncStorage FunctionEntryBuffer; FunctionRecord ExpectedFunctionEntry;
const auto &ExpectedFunctionEntry = *reinterpret_cast<FunctionRecord *>( std::memcpy(&ExpectedFunctionEntry, RewindingRecordPtr, FunctionRecSize);
std::memcpy(&FunctionEntryBuffer, RewindingRecordPtr, FunctionRecSize));
assert(ExpectedFunctionEntry.RecordKind == assert(ExpectedFunctionEntry.RecordKind ==
uint8_t(FunctionRecord::RecordKinds::FunctionEnter) && uint8_t(FunctionRecord::RecordKinds::FunctionEnter) &&
"Expected to find function entry when rewinding tail call."); "Expected to find function entry when rewinding tail call.");