[XRay][compiler-rt] Remove more STL dependenices from FDR mode

Summary:
This change removes dependencies on STL types:

  - std::aligned_storage -- we're using manually-aligned character
    buffers instead for metadata and function records.

  - std::tuple -- use a plain old struct instead.

This is an incremental step in removing all STL references from the
compiler-rt implementation of XRay (llvm.org/PR32274).

Reviewers: dblaikie, pelikan, kpw

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D39277

llvm-svn: 316816
This commit is contained in:
Dean Michael Berris 2017-10-27 23:59:41 +00:00
parent d0c6cf2e8c
commit e8fec1955a
2 changed files with 42 additions and 53 deletions

View File

@ -15,15 +15,10 @@
//
//===----------------------------------------------------------------------===//
#include "xray_fdr_logging.h"
#include <algorithm>
#include <bitset>
#include <cerrno>
#include <cstring>
#include <sys/syscall.h>
#include <sys/time.h>
#include <time.h>
#include <unistd.h>
#include <unordered_map>
#include "sanitizer_common/sanitizer_atomic.h"
#include "sanitizer_common/sanitizer_common.h"
@ -176,19 +171,22 @@ XRayLogInitStatus fdrLoggingReset() XRAY_NEVER_INSTRUMENT {
return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
}
static std::tuple<uint64_t, unsigned char>
getTimestamp() XRAY_NEVER_INSTRUMENT {
struct TSCAndCPU {
uint64_t TSC;
unsigned char CPU;
};
static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT {
// We want to get the TSC as early as possible, so that we can check whether
// we've seen this CPU before. We also do it before we load anything else, to
// allow for forward progress with the scheduling.
unsigned char CPU;
uint64_t TSC;
TSCAndCPU Result;
// Test once for required CPU features
static bool TSCSupported = probeRequiredCPUFeatures();
if (TSCSupported) {
TSC = __xray::readTSC(CPU);
Result.TSC = __xray::readTSC(Result.CPU);
} else {
// FIXME: This code needs refactoring as it appears in multiple locations
timespec TS;
@ -197,34 +195,32 @@ getTimestamp() XRAY_NEVER_INSTRUMENT {
Report("clock_gettime(2) return %d, errno=%d", result, int(errno));
TS = {0, 0};
}
CPU = 0;
TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec;
Result.CPU = 0;
Result.TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec;
}
return std::make_tuple(TSC, CPU);
return Result;
}
void fdrLoggingHandleArg0(int32_t FuncId,
XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
auto TSC_CPU = getTimestamp();
__xray_fdr_internal::processFunctionHook(FuncId, Entry, std::get<0>(TSC_CPU),
std::get<1>(TSC_CPU), 0,
clock_gettime, *BQ);
auto TC = getTimestamp();
__xray_fdr_internal::processFunctionHook(FuncId, Entry, TC.TSC,
TC.CPU, 0, clock_gettime, *BQ);
}
void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry,
uint64_t Arg) XRAY_NEVER_INSTRUMENT {
auto TSC_CPU = getTimestamp();
__xray_fdr_internal::processFunctionHook(FuncId, Entry, std::get<0>(TSC_CPU),
std::get<1>(TSC_CPU), Arg,
clock_gettime, *BQ);
auto TC = getTimestamp();
__xray_fdr_internal::processFunctionHook(
FuncId, Entry, TC.TSC, TC.CPU, Arg, clock_gettime, *BQ);
}
void fdrLoggingHandleCustomEvent(void *Event,
std::size_t EventSize) XRAY_NEVER_INSTRUMENT {
using namespace __xray_fdr_internal;
auto TSC_CPU = getTimestamp();
auto &TSC = std::get<0>(TSC_CPU);
auto &CPU = std::get<1>(TSC_CPU);
auto TC = getTimestamp();
auto &TSC = TC.TSC;
auto &CPU = TC.CPU;
RecursionGuard Guard{Running};
if (!Guard) {
assert(Running && "RecursionGuard is buggy!");
@ -261,7 +257,7 @@ void fdrLoggingHandleCustomEvent(void *Event,
CustomEvent.Type = uint8_t(RecordType::Metadata);
CustomEvent.RecordKind =
uint8_t(MetadataRecord::RecordKinds::CustomEventMarker);
constexpr auto TSCSize = sizeof(std::get<0>(TSC_CPU));
constexpr auto TSCSize = sizeof(TC.TSC);
std::memcpy(&CustomEvent.Data, &ReducedEventSize, sizeof(int32_t));
std::memcpy(&CustomEvent.Data[sizeof(int32_t)], &TSC, TSCSize);
std::memcpy(TLD.RecordPtr, &CustomEvent, sizeof(CustomEvent));

View File

@ -18,16 +18,17 @@
#define XRAY_XRAY_FDR_LOGGING_IMPL_H
#include <cassert>
#include <cstdint>
#include <cstddef>
#include <cstring>
#include <limits>
#include <memory>
#include <pthread.h>
#include <string>
#include <sys/syscall.h>
#include <time.h>
#include <unistd.h>
// FIXME: Implement analogues to std::shared_ptr and std::weak_ptr
#include <memory>
#include "sanitizer_common/sanitizer_common.h"
#include "xray/xray_log_interface.h"
#include "xray_buffer_queue.h"
@ -96,7 +97,7 @@ static void writeTSCWrapMetadata(uint64_t TSC);
// call so that it can be initialized on first use instead of as a global. We
// force the alignment to 64-bytes for x86 cache line alignment, as this
// structure is used in the hot path of implementation.
struct ALIGNED(64) ThreadLocalData {
struct alignas(64) ThreadLocalData {
BufferQueue::Buffer Buffer;
char *RecordPtr = nullptr;
// The number of FunctionEntry records immediately preceding RecordPtr.
@ -176,8 +177,8 @@ static ThreadLocalData &getThreadLocalData() {
// We need aligned, uninitialized storage for the TLS object which is
// trivially destructible. We're going to use this as raw storage and
// placement-new the ThreadLocalData object into it later.
thread_local std::aligned_storage<sizeof(ThreadLocalData),
alignof(ThreadLocalData)>::type TLSBuffer;
alignas(alignof(ThreadLocalData)) thread_local unsigned char
TLSBuffer[sizeof(ThreadLocalData)];
// Ensure that we only actually ever do the pthread initialization once.
thread_local bool UNUSED Unused = [] {
@ -215,7 +216,7 @@ static ThreadLocalData &getThreadLocalData() {
return true;
}();
return *reinterpret_cast<ThreadLocalData *>(&TLSBuffer);
return *reinterpret_cast<ThreadLocalData *>(TLSBuffer);
}
//-----------------------------------------------------------------------------|
@ -255,14 +256,15 @@ public:
inline void writeNewBufferPreamble(pid_t Tid, timespec TS,
char *&MemPtr) XRAY_NEVER_INSTRUMENT {
static constexpr int InitRecordsCount = 2;
std::aligned_storage<sizeof(MetadataRecord)>::type Records[InitRecordsCount];
alignas(alignof(MetadataRecord)) unsigned char
Records[InitRecordsCount * MetadataRecSize];
{
// Write out a MetadataRecord to signify that this is the start of a new
// buffer, associated with a particular thread, with a new CPU. For the
// data, we have 15 bytes to squeeze as much information as we can. At this
// point we only write down the following bytes:
// - Thread ID (pid_t, 4 bytes)
auto &NewBuffer = *reinterpret_cast<MetadataRecord *>(&Records[0]);
auto &NewBuffer = *reinterpret_cast<MetadataRecord *>(Records);
NewBuffer.Type = uint8_t(RecordType::Metadata);
NewBuffer.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewBuffer);
std::memcpy(&NewBuffer.Data, &Tid, sizeof(pid_t));
@ -270,7 +272,8 @@ inline void writeNewBufferPreamble(pid_t Tid, timespec TS,
// Also write the WalltimeMarker record.
{
static_assert(sizeof(time_t) <= 8, "time_t needs to be at most 8 bytes");
auto &WalltimeMarker = *reinterpret_cast<MetadataRecord *>(&Records[1]);
auto &WalltimeMarker =
*reinterpret_cast<MetadataRecord *>(Records + MetadataRecSize);
WalltimeMarker.Type = uint8_t(RecordType::Metadata);
WalltimeMarker.RecordKind =
uint8_t(MetadataRecord::RecordKinds::WalltimeMarker);
@ -382,10 +385,7 @@ static inline void writeCallArgumentMetadata(uint64_t A) XRAY_NEVER_INSTRUMENT {
static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
XRayEntryType EntryType,
char *&MemPtr) XRAY_NEVER_INSTRUMENT {
std::aligned_storage<sizeof(FunctionRecord), alignof(FunctionRecord)>::type
AlignedFuncRecordBuffer;
auto &FuncRecord =
*reinterpret_cast<FunctionRecord *>(&AlignedFuncRecordBuffer);
FunctionRecord FuncRecord;
FuncRecord.Type = uint8_t(RecordType::Function);
// Only take 28 bits of the function id.
FuncRecord.FuncId = FuncId & ~(0x0F << 28);
@ -439,7 +439,7 @@ static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
}
}
std::memcpy(MemPtr, &AlignedFuncRecordBuffer, sizeof(FunctionRecord));
std::memcpy(MemPtr, &FuncRecord, sizeof(FunctionRecord));
MemPtr += sizeof(FunctionRecord);
}
@ -456,14 +456,10 @@ static uint64_t thresholdTicks() {
// "Function Entry" record and any "Tail Call Exit" records after that.
static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC,
uint64_t &LastFunctionEntryTSC, int32_t FuncId) {
using AlignedFuncStorage =
std::aligned_storage<sizeof(FunctionRecord),
alignof(FunctionRecord)>::type;
auto &TLD = getThreadLocalData();
TLD.RecordPtr -= FunctionRecSize;
AlignedFuncStorage AlignedFuncRecordBuffer;
const auto &FuncRecord = *reinterpret_cast<FunctionRecord *>(
std::memcpy(&AlignedFuncRecordBuffer, TLD.RecordPtr, FunctionRecSize));
FunctionRecord FuncRecord;
std::memcpy(&FuncRecord, TLD.RecordPtr, FunctionRecSize);
assert(FuncRecord.RecordKind ==
uint8_t(FunctionRecord::RecordKinds::FunctionEnter) &&
"Expected to find function entry recording when rewinding.");
@ -485,20 +481,17 @@ static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC,
auto RewindingTSC = LastTSC;
auto RewindingRecordPtr = TLD.RecordPtr - FunctionRecSize;
while (TLD.NumTailCalls > 0) {
AlignedFuncStorage TailExitRecordBuffer;
// Rewind the TSC back over the TAIL EXIT record.
const auto &ExpectedTailExit =
*reinterpret_cast<FunctionRecord *>(std::memcpy(
&TailExitRecordBuffer, RewindingRecordPtr, FunctionRecSize));
FunctionRecord ExpectedTailExit;
std::memcpy(&ExpectedTailExit, RewindingRecordPtr, FunctionRecSize);
assert(ExpectedTailExit.RecordKind ==
uint8_t(FunctionRecord::RecordKinds::FunctionTailExit) &&
"Expected to find tail exit when rewinding.");
RewindingRecordPtr -= FunctionRecSize;
RewindingTSC -= ExpectedTailExit.TSCDelta;
AlignedFuncStorage FunctionEntryBuffer;
const auto &ExpectedFunctionEntry = *reinterpret_cast<FunctionRecord *>(
std::memcpy(&FunctionEntryBuffer, RewindingRecordPtr, FunctionRecSize));
FunctionRecord ExpectedFunctionEntry;
std::memcpy(&ExpectedFunctionEntry, RewindingRecordPtr, FunctionRecSize);
assert(ExpectedFunctionEntry.RecordKind ==
uint8_t(FunctionRecord::RecordKinds::FunctionEnter) &&
"Expected to find function entry when rewinding tail call.");