forked from OSchip/llvm-project
[XRay][compiler-rt][NFC] Refactor global TLS variables behind an accessor function.
Summary: This change hides all the initialization of thread_local variables used by the XRay FDR mode implementation behind a function call. This makes initialization of thread-local data to be done lazily, instead of eagerly when they're done as globals. It also gives us an isolation mechanism if/when we want to change the TLS implementation from using the C++ thread_local keyword, for something more ad-hoc (potentialy using pthread directly) on some platforms or set-ups where we cannot use the C++ thread_local variables. Reviewers: kpw, eizan Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D37248 llvm-svn: 311997
This commit is contained in:
parent
9203afcf0d
commit
eca980396e
|
@ -223,7 +223,8 @@ void fdrLoggingHandleCustomEvent(void *Event,
|
||||||
(void)Once;
|
(void)Once;
|
||||||
}
|
}
|
||||||
int32_t ReducedEventSize = static_cast<int32_t>(EventSize);
|
int32_t ReducedEventSize = static_cast<int32_t>(EventSize);
|
||||||
if (!isLogInitializedAndReady(*LocalBQ, TSC, CPU, clock_gettime))
|
auto &TLD = getThreadLocalData();
|
||||||
|
if (!isLogInitializedAndReady(TLD.LocalBQ, TSC, CPU, clock_gettime))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Here we need to prepare the log to handle:
|
// Here we need to prepare the log to handle:
|
||||||
|
@ -231,7 +232,7 @@ void fdrLoggingHandleCustomEvent(void *Event,
|
||||||
// - The additional data we're going to write. Currently, that's the size of
|
// - The additional data we're going to write. Currently, that's the size of
|
||||||
// the event we're going to dump into the log as free-form bytes.
|
// the event we're going to dump into the log as free-form bytes.
|
||||||
if (!prepareBuffer(clock_gettime, MetadataRecSize + EventSize)) {
|
if (!prepareBuffer(clock_gettime, MetadataRecSize + EventSize)) {
|
||||||
LocalBQ = nullptr;
|
TLD.LocalBQ = nullptr;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -246,9 +247,9 @@ void fdrLoggingHandleCustomEvent(void *Event,
|
||||||
constexpr auto TSCSize = sizeof(std::get<0>(TSC_CPU));
|
constexpr auto TSCSize = sizeof(std::get<0>(TSC_CPU));
|
||||||
std::memcpy(&CustomEvent.Data, &ReducedEventSize, sizeof(int32_t));
|
std::memcpy(&CustomEvent.Data, &ReducedEventSize, sizeof(int32_t));
|
||||||
std::memcpy(&CustomEvent.Data[sizeof(int32_t)], &TSC, TSCSize);
|
std::memcpy(&CustomEvent.Data[sizeof(int32_t)], &TSC, TSCSize);
|
||||||
std::memcpy(RecordPtr, &CustomEvent, sizeof(CustomEvent));
|
std::memcpy(TLD.RecordPtr, &CustomEvent, sizeof(CustomEvent));
|
||||||
RecordPtr += sizeof(CustomEvent);
|
TLD.RecordPtr += sizeof(CustomEvent);
|
||||||
std::memcpy(RecordPtr, Event, ReducedEventSize);
|
std::memcpy(TLD.RecordPtr, Event, ReducedEventSize);
|
||||||
endBufferIfFull();
|
endBufferIfFull();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -104,36 +104,38 @@ static void processFunctionHook(int32_t FuncId, XRayEntryType Entry,
|
||||||
__sanitizer::atomic_sint32_t &LoggingStatus,
|
__sanitizer::atomic_sint32_t &LoggingStatus,
|
||||||
const std::shared_ptr<BufferQueue> &BQ);
|
const std::shared_ptr<BufferQueue> &BQ);
|
||||||
|
|
||||||
//-----------------------------------------------------------------------------|
|
// Group together thread-local-data in a struct, then hide it behind a function
|
||||||
// The rest of the file is implementation. |
|
// call so that it can be initialized on first use instead of as a global.
|
||||||
//-----------------------------------------------------------------------------|
|
struct ThreadLocalData {
|
||||||
// Functions are implemented in the header for inlining since we don't want |
|
BufferQueue::Buffer Buffer;
|
||||||
// to grow the stack when we've hijacked the binary for logging. |
|
char *RecordPtr = nullptr;
|
||||||
//-----------------------------------------------------------------------------|
|
// The number of FunctionEntry records immediately preceding RecordPtr.
|
||||||
|
uint8_t NumConsecutiveFnEnters = 0;
|
||||||
|
|
||||||
namespace {
|
// The number of adjacent, consecutive pairs of FunctionEntry, Tail Exit
|
||||||
|
// records preceding RecordPtr.
|
||||||
|
uint8_t NumTailCalls = 0;
|
||||||
|
|
||||||
thread_local BufferQueue::Buffer Buffer;
|
// We use a thread_local variable to keep track of which CPUs we've already
|
||||||
thread_local char *RecordPtr = nullptr;
|
// run, and the TSC times for these CPUs. This allows us to stop repeating the
|
||||||
|
// CPU field in the function records.
|
||||||
|
//
|
||||||
|
// We assume that we'll support only 65536 CPUs for x86_64.
|
||||||
|
uint16_t CurrentCPU = std::numeric_limits<uint16_t>::max();
|
||||||
|
uint64_t LastTSC = 0;
|
||||||
|
uint64_t LastFunctionEntryTSC = 0;
|
||||||
|
|
||||||
// The number of FunctionEntry records immediately preceding RecordPtr.
|
// Make sure a thread that's ever called handleArg0 has a thread-local
|
||||||
thread_local uint8_t NumConsecutiveFnEnters = 0;
|
// live reference to the buffer queue for this particular instance of
|
||||||
|
// FDRLogging, and that we're going to clean it up when the thread exits.
|
||||||
|
std::shared_ptr<BufferQueue> LocalBQ = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
// The number of adjacent, consecutive pairs of FunctionEntry, Tail Exit
|
// Forward-declare, defined later.
|
||||||
// records preceding RecordPtr.
|
static ThreadLocalData &getThreadLocalData();
|
||||||
thread_local uint8_t NumTailCalls = 0;
|
|
||||||
|
|
||||||
constexpr auto MetadataRecSize = sizeof(MetadataRecord);
|
static constexpr auto MetadataRecSize = sizeof(MetadataRecord);
|
||||||
constexpr auto FunctionRecSize = sizeof(FunctionRecord);
|
static constexpr auto FunctionRecSize = sizeof(FunctionRecord);
|
||||||
|
|
||||||
// We use a thread_local variable to keep track of which CPUs we've already
|
|
||||||
// run, and the TSC times for these CPUs. This allows us to stop repeating the
|
|
||||||
// CPU field in the function records.
|
|
||||||
//
|
|
||||||
// We assume that we'll support only 65536 CPUs for x86_64.
|
|
||||||
thread_local uint16_t CurrentCPU = std::numeric_limits<uint16_t>::max();
|
|
||||||
thread_local uint64_t LastTSC = 0;
|
|
||||||
thread_local uint64_t LastFunctionEntryTSC = 0;
|
|
||||||
|
|
||||||
class ThreadExitBufferCleanup {
|
class ThreadExitBufferCleanup {
|
||||||
std::shared_ptr<BufferQueue> &Buffers;
|
std::shared_ptr<BufferQueue> &Buffers;
|
||||||
|
@ -146,6 +148,8 @@ public:
|
||||||
Buffer(Buffer) {}
|
Buffer(Buffer) {}
|
||||||
|
|
||||||
~ThreadExitBufferCleanup() noexcept XRAY_NEVER_INSTRUMENT {
|
~ThreadExitBufferCleanup() noexcept XRAY_NEVER_INSTRUMENT {
|
||||||
|
auto &TLD = getThreadLocalData();
|
||||||
|
auto &RecordPtr = TLD.RecordPtr;
|
||||||
if (RecordPtr == nullptr)
|
if (RecordPtr == nullptr)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
@ -166,19 +170,27 @@ public:
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Make sure a thread that's ever called handleArg0 has a thread-local
|
static ThreadLocalData &getThreadLocalData() {
|
||||||
// live reference to the buffer queue for this particular instance of
|
thread_local ThreadLocalData TLD;
|
||||||
// FDRLogging, and that we're going to clean it up when the thread exits.
|
thread_local ThreadExitBufferCleanup Cleanup(TLD.LocalBQ, TLD.Buffer);
|
||||||
thread_local std::shared_ptr<BufferQueue>* LocalBQ =
|
return TLD;
|
||||||
new std::shared_ptr<BufferQueue>();
|
}
|
||||||
thread_local ThreadExitBufferCleanup Cleanup(*LocalBQ, Buffer);
|
|
||||||
|
//-----------------------------------------------------------------------------|
|
||||||
|
// The rest of the file is implementation. |
|
||||||
|
//-----------------------------------------------------------------------------|
|
||||||
|
// Functions are implemented in the header for inlining since we don't want |
|
||||||
|
// to grow the stack when we've hijacked the binary for logging. |
|
||||||
|
//-----------------------------------------------------------------------------|
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
class RecursionGuard {
|
class RecursionGuard {
|
||||||
bool &Running;
|
volatile bool &Running;
|
||||||
const bool Valid;
|
const bool Valid;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit RecursionGuard(bool &R) : Running(R), Valid(!R) {
|
explicit RecursionGuard(volatile bool &R) : Running(R), Valid(!R) {
|
||||||
if (Valid)
|
if (Valid)
|
||||||
Running = true;
|
Running = true;
|
||||||
}
|
}
|
||||||
|
@ -238,24 +250,29 @@ inline void writeNewBufferPreamble(pid_t Tid, timespec TS,
|
||||||
}
|
}
|
||||||
std::memcpy(MemPtr, Records, sizeof(MetadataRecord) * InitRecordsCount);
|
std::memcpy(MemPtr, Records, sizeof(MetadataRecord) * InitRecordsCount);
|
||||||
MemPtr += sizeof(MetadataRecord) * InitRecordsCount;
|
MemPtr += sizeof(MetadataRecord) * InitRecordsCount;
|
||||||
NumConsecutiveFnEnters = 0;
|
auto &TLD = getThreadLocalData();
|
||||||
NumTailCalls = 0;
|
TLD.NumConsecutiveFnEnters = 0;
|
||||||
|
TLD.NumTailCalls = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void setupNewBuffer(int (*wall_clock_reader)(
|
inline void setupNewBuffer(int (*wall_clock_reader)(
|
||||||
clockid_t, struct timespec *)) XRAY_NEVER_INSTRUMENT {
|
clockid_t, struct timespec *)) XRAY_NEVER_INSTRUMENT {
|
||||||
|
auto &TLD = getThreadLocalData();
|
||||||
|
auto &Buffer = TLD.Buffer;
|
||||||
|
auto &RecordPtr = TLD.RecordPtr;
|
||||||
RecordPtr = static_cast<char *>(Buffer.Buffer);
|
RecordPtr = static_cast<char *>(Buffer.Buffer);
|
||||||
pid_t Tid = syscall(SYS_gettid);
|
pid_t Tid = syscall(SYS_gettid);
|
||||||
timespec TS{0, 0};
|
timespec TS{0, 0};
|
||||||
// This is typically clock_gettime, but callers have injection ability.
|
// This is typically clock_gettime, but callers have injection ability.
|
||||||
wall_clock_reader(CLOCK_MONOTONIC, &TS);
|
wall_clock_reader(CLOCK_MONOTONIC, &TS);
|
||||||
writeNewBufferPreamble(Tid, TS, RecordPtr);
|
writeNewBufferPreamble(Tid, TS, RecordPtr);
|
||||||
NumConsecutiveFnEnters = 0;
|
TLD.NumConsecutiveFnEnters = 0;
|
||||||
NumTailCalls = 0;
|
TLD.NumTailCalls = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC,
|
inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC,
|
||||||
char *&MemPtr) XRAY_NEVER_INSTRUMENT {
|
char *&MemPtr) XRAY_NEVER_INSTRUMENT {
|
||||||
|
auto &TLD = getThreadLocalData();
|
||||||
MetadataRecord NewCPUId;
|
MetadataRecord NewCPUId;
|
||||||
NewCPUId.Type = uint8_t(RecordType::Metadata);
|
NewCPUId.Type = uint8_t(RecordType::Metadata);
|
||||||
NewCPUId.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewCPUId);
|
NewCPUId.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewCPUId);
|
||||||
|
@ -268,32 +285,34 @@ inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC,
|
||||||
std::memcpy(&NewCPUId.Data[sizeof(CPU)], &TSC, sizeof(TSC));
|
std::memcpy(&NewCPUId.Data[sizeof(CPU)], &TSC, sizeof(TSC));
|
||||||
std::memcpy(MemPtr, &NewCPUId, sizeof(MetadataRecord));
|
std::memcpy(MemPtr, &NewCPUId, sizeof(MetadataRecord));
|
||||||
MemPtr += sizeof(MetadataRecord);
|
MemPtr += sizeof(MetadataRecord);
|
||||||
NumConsecutiveFnEnters = 0;
|
TLD.NumConsecutiveFnEnters = 0;
|
||||||
NumTailCalls = 0;
|
TLD.NumTailCalls = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void writeNewCPUIdMetadata(uint16_t CPU,
|
inline void writeNewCPUIdMetadata(uint16_t CPU,
|
||||||
uint64_t TSC) XRAY_NEVER_INSTRUMENT {
|
uint64_t TSC) XRAY_NEVER_INSTRUMENT {
|
||||||
writeNewCPUIdMetadata(CPU, TSC, RecordPtr);
|
writeNewCPUIdMetadata(CPU, TSC, getThreadLocalData().RecordPtr);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void writeEOBMetadata(char *&MemPtr) XRAY_NEVER_INSTRUMENT {
|
inline void writeEOBMetadata(char *&MemPtr) XRAY_NEVER_INSTRUMENT {
|
||||||
|
auto &TLD = getThreadLocalData();
|
||||||
MetadataRecord EOBMeta;
|
MetadataRecord EOBMeta;
|
||||||
EOBMeta.Type = uint8_t(RecordType::Metadata);
|
EOBMeta.Type = uint8_t(RecordType::Metadata);
|
||||||
EOBMeta.RecordKind = uint8_t(MetadataRecord::RecordKinds::EndOfBuffer);
|
EOBMeta.RecordKind = uint8_t(MetadataRecord::RecordKinds::EndOfBuffer);
|
||||||
// For now we don't write any bytes into the Data field.
|
// For now we don't write any bytes into the Data field.
|
||||||
std::memcpy(MemPtr, &EOBMeta, sizeof(MetadataRecord));
|
std::memcpy(MemPtr, &EOBMeta, sizeof(MetadataRecord));
|
||||||
MemPtr += sizeof(MetadataRecord);
|
MemPtr += sizeof(MetadataRecord);
|
||||||
NumConsecutiveFnEnters = 0;
|
TLD.NumConsecutiveFnEnters = 0;
|
||||||
NumTailCalls = 0;
|
TLD.NumTailCalls = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void writeEOBMetadata() XRAY_NEVER_INSTRUMENT {
|
inline void writeEOBMetadata() XRAY_NEVER_INSTRUMENT {
|
||||||
writeEOBMetadata(RecordPtr);
|
writeEOBMetadata(getThreadLocalData().RecordPtr);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void writeTSCWrapMetadata(uint64_t TSC,
|
inline void writeTSCWrapMetadata(uint64_t TSC,
|
||||||
char *&MemPtr) XRAY_NEVER_INSTRUMENT {
|
char *&MemPtr) XRAY_NEVER_INSTRUMENT {
|
||||||
|
auto &TLD = getThreadLocalData();
|
||||||
MetadataRecord TSCWrap;
|
MetadataRecord TSCWrap;
|
||||||
TSCWrap.Type = uint8_t(RecordType::Metadata);
|
TSCWrap.Type = uint8_t(RecordType::Metadata);
|
||||||
TSCWrap.RecordKind = uint8_t(MetadataRecord::RecordKinds::TSCWrap);
|
TSCWrap.RecordKind = uint8_t(MetadataRecord::RecordKinds::TSCWrap);
|
||||||
|
@ -304,12 +323,12 @@ inline void writeTSCWrapMetadata(uint64_t TSC,
|
||||||
std::memcpy(&TSCWrap.Data, &TSC, sizeof(TSC));
|
std::memcpy(&TSCWrap.Data, &TSC, sizeof(TSC));
|
||||||
std::memcpy(MemPtr, &TSCWrap, sizeof(MetadataRecord));
|
std::memcpy(MemPtr, &TSCWrap, sizeof(MetadataRecord));
|
||||||
MemPtr += sizeof(MetadataRecord);
|
MemPtr += sizeof(MetadataRecord);
|
||||||
NumConsecutiveFnEnters = 0;
|
TLD.NumConsecutiveFnEnters = 0;
|
||||||
NumTailCalls = 0;
|
TLD.NumTailCalls = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT {
|
inline void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT {
|
||||||
writeTSCWrapMetadata(TSC, RecordPtr);
|
writeTSCWrapMetadata(TSC, getThreadLocalData().RecordPtr);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
|
inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
|
||||||
|
@ -324,36 +343,37 @@ inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
|
||||||
FuncRecord.FuncId = FuncId & ~(0x0F << 28);
|
FuncRecord.FuncId = FuncId & ~(0x0F << 28);
|
||||||
FuncRecord.TSCDelta = TSCDelta;
|
FuncRecord.TSCDelta = TSCDelta;
|
||||||
|
|
||||||
|
auto &TLD = getThreadLocalData();
|
||||||
switch (EntryType) {
|
switch (EntryType) {
|
||||||
case XRayEntryType::ENTRY:
|
case XRayEntryType::ENTRY:
|
||||||
++NumConsecutiveFnEnters;
|
++TLD.NumConsecutiveFnEnters;
|
||||||
FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionEnter);
|
FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionEnter);
|
||||||
break;
|
break;
|
||||||
case XRayEntryType::LOG_ARGS_ENTRY:
|
case XRayEntryType::LOG_ARGS_ENTRY:
|
||||||
// We should not rewind functions with logged args.
|
// We should not rewind functions with logged args.
|
||||||
NumConsecutiveFnEnters = 0;
|
TLD.NumConsecutiveFnEnters = 0;
|
||||||
NumTailCalls = 0;
|
TLD.NumTailCalls = 0;
|
||||||
FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionEnter);
|
FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionEnter);
|
||||||
break;
|
break;
|
||||||
case XRayEntryType::EXIT:
|
case XRayEntryType::EXIT:
|
||||||
// If we've decided to log the function exit, we will never erase the log
|
// If we've decided to log the function exit, we will never erase the log
|
||||||
// before it.
|
// before it.
|
||||||
NumConsecutiveFnEnters = 0;
|
TLD.NumConsecutiveFnEnters = 0;
|
||||||
NumTailCalls = 0;
|
TLD.NumTailCalls = 0;
|
||||||
FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionExit);
|
FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionExit);
|
||||||
break;
|
break;
|
||||||
case XRayEntryType::TAIL:
|
case XRayEntryType::TAIL:
|
||||||
// If we just entered the function we're tail exiting from or erased every
|
// If we just entered the function we're tail exiting from or erased every
|
||||||
// invocation since then, this function entry tail pair is a candidate to
|
// invocation since then, this function entry tail pair is a candidate to
|
||||||
// be erased when the child function exits.
|
// be erased when the child function exits.
|
||||||
if (NumConsecutiveFnEnters > 0) {
|
if (TLD.NumConsecutiveFnEnters > 0) {
|
||||||
++NumTailCalls;
|
++TLD.NumTailCalls;
|
||||||
NumConsecutiveFnEnters = 0;
|
TLD.NumConsecutiveFnEnters = 0;
|
||||||
} else {
|
} else {
|
||||||
// We will never be able to erase this tail call since we have logged
|
// We will never be able to erase this tail call since we have logged
|
||||||
// something in between the function entry and tail exit.
|
// something in between the function entry and tail exit.
|
||||||
NumTailCalls = 0;
|
TLD.NumTailCalls = 0;
|
||||||
NumConsecutiveFnEnters = 0;
|
TLD.NumConsecutiveFnEnters = 0;
|
||||||
}
|
}
|
||||||
FuncRecord.RecordKind =
|
FuncRecord.RecordKind =
|
||||||
uint8_t(FunctionRecord::RecordKinds::FunctionTailExit);
|
uint8_t(FunctionRecord::RecordKinds::FunctionTailExit);
|
||||||
|
@ -391,20 +411,21 @@ static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC,
|
||||||
using AlignedFuncStorage =
|
using AlignedFuncStorage =
|
||||||
std::aligned_storage<sizeof(FunctionRecord),
|
std::aligned_storage<sizeof(FunctionRecord),
|
||||||
alignof(FunctionRecord)>::type;
|
alignof(FunctionRecord)>::type;
|
||||||
RecordPtr -= FunctionRecSize;
|
auto &TLD = getThreadLocalData();
|
||||||
|
TLD.RecordPtr -= FunctionRecSize;
|
||||||
AlignedFuncStorage AlignedFuncRecordBuffer;
|
AlignedFuncStorage AlignedFuncRecordBuffer;
|
||||||
const auto &FuncRecord = *reinterpret_cast<FunctionRecord *>(
|
const auto &FuncRecord = *reinterpret_cast<FunctionRecord *>(
|
||||||
std::memcpy(&AlignedFuncRecordBuffer, RecordPtr, FunctionRecSize));
|
std::memcpy(&AlignedFuncRecordBuffer, TLD.RecordPtr, FunctionRecSize));
|
||||||
assert(FuncRecord.RecordKind ==
|
assert(FuncRecord.RecordKind ==
|
||||||
uint8_t(FunctionRecord::RecordKinds::FunctionEnter) &&
|
uint8_t(FunctionRecord::RecordKinds::FunctionEnter) &&
|
||||||
"Expected to find function entry recording when rewinding.");
|
"Expected to find function entry recording when rewinding.");
|
||||||
assert(FuncRecord.FuncId == (FuncId & ~(0x0F << 28)) &&
|
assert(FuncRecord.FuncId == (FuncId & ~(0x0F << 28)) &&
|
||||||
"Expected matching function id when rewinding Exit");
|
"Expected matching function id when rewinding Exit");
|
||||||
--NumConsecutiveFnEnters;
|
--TLD.NumConsecutiveFnEnters;
|
||||||
LastTSC -= FuncRecord.TSCDelta;
|
LastTSC -= FuncRecord.TSCDelta;
|
||||||
|
|
||||||
// We unwound one call. Update the state and return without writing a log.
|
// We unwound one call. Update the state and return without writing a log.
|
||||||
if (NumConsecutiveFnEnters != 0) {
|
if (TLD.NumConsecutiveFnEnters != 0) {
|
||||||
LastFunctionEntryTSC -= FuncRecord.TSCDelta;
|
LastFunctionEntryTSC -= FuncRecord.TSCDelta;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -414,8 +435,8 @@ static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC,
|
||||||
// exited from via this exit.
|
// exited from via this exit.
|
||||||
LastFunctionEntryTSC = 0;
|
LastFunctionEntryTSC = 0;
|
||||||
auto RewindingTSC = LastTSC;
|
auto RewindingTSC = LastTSC;
|
||||||
auto RewindingRecordPtr = RecordPtr - FunctionRecSize;
|
auto RewindingRecordPtr = TLD.RecordPtr - FunctionRecSize;
|
||||||
while (NumTailCalls > 0) {
|
while (TLD.NumTailCalls > 0) {
|
||||||
AlignedFuncStorage TailExitRecordBuffer;
|
AlignedFuncStorage TailExitRecordBuffer;
|
||||||
// Rewind the TSC back over the TAIL EXIT record.
|
// Rewind the TSC back over the TAIL EXIT record.
|
||||||
const auto &ExpectedTailExit =
|
const auto &ExpectedTailExit =
|
||||||
|
@ -438,24 +459,25 @@ static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC,
|
||||||
|
|
||||||
// This tail call exceeded the threshold duration. It will not be erased.
|
// This tail call exceeded the threshold duration. It will not be erased.
|
||||||
if ((TSC - RewindingTSC) >= thresholdTicks()) {
|
if ((TSC - RewindingTSC) >= thresholdTicks()) {
|
||||||
NumTailCalls = 0;
|
TLD.NumTailCalls = 0;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// We can erase a tail exit pair that we're exiting through since
|
// We can erase a tail exit pair that we're exiting through since
|
||||||
// its duration is under threshold.
|
// its duration is under threshold.
|
||||||
--NumTailCalls;
|
--TLD.NumTailCalls;
|
||||||
RewindingRecordPtr -= FunctionRecSize;
|
RewindingRecordPtr -= FunctionRecSize;
|
||||||
RewindingTSC -= ExpectedFunctionEntry.TSCDelta;
|
RewindingTSC -= ExpectedFunctionEntry.TSCDelta;
|
||||||
RecordPtr -= 2 * FunctionRecSize;
|
TLD.RecordPtr -= 2 * FunctionRecSize;
|
||||||
LastTSC = RewindingTSC;
|
LastTSC = RewindingTSC;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool releaseThreadLocalBuffer(BufferQueue &BQArg) {
|
inline bool releaseThreadLocalBuffer(BufferQueue &BQArg) {
|
||||||
auto EC = BQArg.releaseBuffer(Buffer);
|
auto &TLD = getThreadLocalData();
|
||||||
|
auto EC = BQArg.releaseBuffer(TLD.Buffer);
|
||||||
if (EC != BufferQueue::ErrorCode::Ok) {
|
if (EC != BufferQueue::ErrorCode::Ok) {
|
||||||
Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
|
Report("Failed to release buffer at %p; error=%s\n", TLD.Buffer.Buffer,
|
||||||
BufferQueue::getErrorString(EC));
|
BufferQueue::getErrorString(EC));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -465,12 +487,14 @@ inline bool releaseThreadLocalBuffer(BufferQueue &BQArg) {
|
||||||
inline bool prepareBuffer(int (*wall_clock_reader)(clockid_t,
|
inline bool prepareBuffer(int (*wall_clock_reader)(clockid_t,
|
||||||
struct timespec *),
|
struct timespec *),
|
||||||
size_t MaxSize) XRAY_NEVER_INSTRUMENT {
|
size_t MaxSize) XRAY_NEVER_INSTRUMENT {
|
||||||
char *BufferStart = static_cast<char *>(Buffer.Buffer);
|
auto &TLD = getThreadLocalData();
|
||||||
if ((RecordPtr + MaxSize) > (BufferStart + Buffer.Size - MetadataRecSize)) {
|
char *BufferStart = static_cast<char *>(TLD.Buffer.Buffer);
|
||||||
|
if ((TLD.RecordPtr + MaxSize) >
|
||||||
|
(BufferStart + TLD.Buffer.Size - MetadataRecSize)) {
|
||||||
writeEOBMetadata();
|
writeEOBMetadata();
|
||||||
if (!releaseThreadLocalBuffer(**LocalBQ))
|
if (!releaseThreadLocalBuffer(*TLD.LocalBQ))
|
||||||
return false;
|
return false;
|
||||||
auto EC = (*LocalBQ)->getBuffer(Buffer);
|
auto EC = TLD.LocalBQ->getBuffer(TLD.Buffer);
|
||||||
if (EC != BufferQueue::ErrorCode::Ok) {
|
if (EC != BufferQueue::ErrorCode::Ok) {
|
||||||
Report("Failed to acquire a buffer; error=%s\n",
|
Report("Failed to acquire a buffer; error=%s\n",
|
||||||
BufferQueue::getErrorString(EC));
|
BufferQueue::getErrorString(EC));
|
||||||
|
@ -489,14 +513,15 @@ inline bool isLogInitializedAndReady(
|
||||||
// We should take the opportunity to release the buffer though.
|
// We should take the opportunity to release the buffer though.
|
||||||
auto Status = __sanitizer::atomic_load(&LoggingStatus,
|
auto Status = __sanitizer::atomic_load(&LoggingStatus,
|
||||||
__sanitizer::memory_order_acquire);
|
__sanitizer::memory_order_acquire);
|
||||||
|
auto &TLD = getThreadLocalData();
|
||||||
if (Status != XRayLogInitStatus::XRAY_LOG_INITIALIZED) {
|
if (Status != XRayLogInitStatus::XRAY_LOG_INITIALIZED) {
|
||||||
if (RecordPtr != nullptr &&
|
if (TLD.RecordPtr != nullptr &&
|
||||||
(Status == XRayLogInitStatus::XRAY_LOG_FINALIZING ||
|
(Status == XRayLogInitStatus::XRAY_LOG_FINALIZING ||
|
||||||
Status == XRayLogInitStatus::XRAY_LOG_FINALIZED)) {
|
Status == XRayLogInitStatus::XRAY_LOG_FINALIZED)) {
|
||||||
writeEOBMetadata();
|
writeEOBMetadata();
|
||||||
if (!releaseThreadLocalBuffer(*LBQ))
|
if (!releaseThreadLocalBuffer(*LBQ))
|
||||||
return false;
|
return false;
|
||||||
RecordPtr = nullptr;
|
TLD.RecordPtr = nullptr;
|
||||||
LBQ = nullptr;
|
LBQ = nullptr;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -507,11 +532,11 @@ inline bool isLogInitializedAndReady(
|
||||||
writeEOBMetadata();
|
writeEOBMetadata();
|
||||||
if (!releaseThreadLocalBuffer(*LBQ))
|
if (!releaseThreadLocalBuffer(*LBQ))
|
||||||
return false;
|
return false;
|
||||||
RecordPtr = nullptr;
|
TLD.RecordPtr = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Buffer.Buffer == nullptr) {
|
if (TLD.Buffer.Buffer == nullptr) {
|
||||||
auto EC = LBQ->getBuffer(Buffer);
|
auto EC = LBQ->getBuffer(TLD.Buffer);
|
||||||
if (EC != BufferQueue::ErrorCode::Ok) {
|
if (EC != BufferQueue::ErrorCode::Ok) {
|
||||||
auto LS = __sanitizer::atomic_load(&LoggingStatus,
|
auto LS = __sanitizer::atomic_load(&LoggingStatus,
|
||||||
__sanitizer::memory_order_acquire);
|
__sanitizer::memory_order_acquire);
|
||||||
|
@ -525,10 +550,10 @@ inline bool isLogInitializedAndReady(
|
||||||
setupNewBuffer(wall_clock_reader);
|
setupNewBuffer(wall_clock_reader);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (CurrentCPU == std::numeric_limits<uint16_t>::max()) {
|
if (TLD.CurrentCPU == std::numeric_limits<uint16_t>::max()) {
|
||||||
// This means this is the first CPU this thread has ever run on. We set
|
// This means this is the first CPU this thread has ever run on. We set
|
||||||
// the current CPU and record this as the first TSC we've seen.
|
// the current CPU and record this as the first TSC we've seen.
|
||||||
CurrentCPU = CPU;
|
TLD.CurrentCPU = CPU;
|
||||||
writeNewCPUIdMetadata(CPU, TSC);
|
writeNewCPUIdMetadata(CPU, TSC);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -536,12 +561,13 @@ inline bool isLogInitializedAndReady(
|
||||||
} // namespace __xray_fdr_internal
|
} // namespace __xray_fdr_internal
|
||||||
|
|
||||||
inline void endBufferIfFull() XRAY_NEVER_INSTRUMENT {
|
inline void endBufferIfFull() XRAY_NEVER_INSTRUMENT {
|
||||||
auto BufferStart = static_cast<char *>(Buffer.Buffer);
|
auto &TLD = getThreadLocalData();
|
||||||
if ((RecordPtr + MetadataRecSize) - BufferStart == MetadataRecSize) {
|
auto BufferStart = static_cast<char *>(TLD.Buffer.Buffer);
|
||||||
|
if ((TLD.RecordPtr + MetadataRecSize) - BufferStart == MetadataRecSize) {
|
||||||
writeEOBMetadata();
|
writeEOBMetadata();
|
||||||
if (!releaseThreadLocalBuffer(**LocalBQ))
|
if (!releaseThreadLocalBuffer(*TLD.LocalBQ))
|
||||||
return;
|
return;
|
||||||
RecordPtr = nullptr;
|
TLD.RecordPtr = nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -555,19 +581,21 @@ inline void processFunctionHook(
|
||||||
// don't want to be clobbering potentially partial writes already happening in
|
// don't want to be clobbering potentially partial writes already happening in
|
||||||
// the thread. We use a simple thread_local latch to only allow one on-going
|
// the thread. We use a simple thread_local latch to only allow one on-going
|
||||||
// handleArg0 to happen at any given time.
|
// handleArg0 to happen at any given time.
|
||||||
thread_local bool Running = false;
|
thread_local volatile bool Running = false;
|
||||||
RecursionGuard Guard{Running};
|
RecursionGuard Guard{Running};
|
||||||
if (!Guard) {
|
if (!Guard) {
|
||||||
assert(Running == true && "RecursionGuard is buggy!");
|
assert(Running == true && "RecursionGuard is buggy!");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto &TLD = getThreadLocalData();
|
||||||
|
|
||||||
// In case the reference has been cleaned up before, we make sure we
|
// In case the reference has been cleaned up before, we make sure we
|
||||||
// initialize it to the provided BufferQueue.
|
// initialize it to the provided BufferQueue.
|
||||||
if ((*LocalBQ) == nullptr)
|
if (TLD.LocalBQ == nullptr)
|
||||||
*LocalBQ = BQ;
|
TLD.LocalBQ = BQ;
|
||||||
|
|
||||||
if (!isLogInitializedAndReady(*LocalBQ, TSC, CPU, wall_clock_reader))
|
if (!isLogInitializedAndReady(TLD.LocalBQ, TSC, CPU, wall_clock_reader))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Before we go setting up writing new function entries, we need to be really
|
// Before we go setting up writing new function entries, we need to be really
|
||||||
|
@ -607,14 +635,14 @@ inline void processFunctionHook(
|
||||||
// Buffer, set it up properly before doing any further writing.
|
// Buffer, set it up properly before doing any further writing.
|
||||||
//
|
//
|
||||||
if (!prepareBuffer(wall_clock_reader, FunctionRecSize + MetadataRecSize)) {
|
if (!prepareBuffer(wall_clock_reader, FunctionRecSize + MetadataRecSize)) {
|
||||||
*LocalBQ = nullptr;
|
TLD.LocalBQ = nullptr;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// By this point, we are now ready to write at most 24 bytes (one metadata
|
// By this point, we are now ready to write at most 24 bytes (one metadata
|
||||||
// record and one function record).
|
// record and one function record).
|
||||||
assert((RecordPtr + (MetadataRecSize + FunctionRecSize)) -
|
assert((TLD.RecordPtr + (MetadataRecSize + FunctionRecSize)) -
|
||||||
static_cast<char *>(Buffer.Buffer) >=
|
static_cast<char *>(TLD.Buffer.Buffer) >=
|
||||||
static_cast<ptrdiff_t>(MetadataRecSize) &&
|
static_cast<ptrdiff_t>(MetadataRecSize) &&
|
||||||
"Misconfigured BufferQueue provided; Buffer size not large enough.");
|
"Misconfigured BufferQueue provided; Buffer size not large enough.");
|
||||||
|
|
||||||
|
@ -638,36 +666,36 @@ inline void processFunctionHook(
|
||||||
// the correct TSC delta.
|
// the correct TSC delta.
|
||||||
//
|
//
|
||||||
uint32_t RecordTSCDelta = 0;
|
uint32_t RecordTSCDelta = 0;
|
||||||
if (CPU != CurrentCPU) {
|
if (CPU != TLD.CurrentCPU) {
|
||||||
// We've moved to a new CPU.
|
// We've moved to a new CPU.
|
||||||
writeNewCPUIdMetadata(CPU, TSC);
|
writeNewCPUIdMetadata(CPU, TSC);
|
||||||
} else {
|
} else {
|
||||||
// If the delta is greater than the range for a uint32_t, then we write out
|
// If the delta is greater than the range for a uint32_t, then we write out
|
||||||
// the TSC wrap metadata entry with the full TSC, and the TSC for the
|
// the TSC wrap metadata entry with the full TSC, and the TSC for the
|
||||||
// function record be 0.
|
// function record be 0.
|
||||||
auto Delta = TSC - LastTSC;
|
auto Delta = TSC - TLD.LastTSC;
|
||||||
if (Delta > (1ULL << 32) - 1)
|
if (Delta > (1ULL << 32) - 1)
|
||||||
writeTSCWrapMetadata(TSC);
|
writeTSCWrapMetadata(TSC);
|
||||||
else
|
else
|
||||||
RecordTSCDelta = Delta;
|
RecordTSCDelta = Delta;
|
||||||
}
|
}
|
||||||
|
|
||||||
LastTSC = TSC;
|
TLD.LastTSC = TSC;
|
||||||
CurrentCPU = CPU;
|
TLD.CurrentCPU = CPU;
|
||||||
switch (Entry) {
|
switch (Entry) {
|
||||||
case XRayEntryType::ENTRY:
|
case XRayEntryType::ENTRY:
|
||||||
case XRayEntryType::LOG_ARGS_ENTRY:
|
case XRayEntryType::LOG_ARGS_ENTRY:
|
||||||
// Update the thread local state for the next invocation.
|
// Update the thread local state for the next invocation.
|
||||||
LastFunctionEntryTSC = TSC;
|
TLD.LastFunctionEntryTSC = TSC;
|
||||||
break;
|
break;
|
||||||
case XRayEntryType::TAIL:
|
case XRayEntryType::TAIL:
|
||||||
break;
|
break;
|
||||||
case XRayEntryType::EXIT:
|
case XRayEntryType::EXIT:
|
||||||
// Break out and write the exit record if we can't erase any functions.
|
// Break out and write the exit record if we can't erase any functions.
|
||||||
if (NumConsecutiveFnEnters == 0 ||
|
if (TLD.NumConsecutiveFnEnters == 0 ||
|
||||||
(TSC - LastFunctionEntryTSC) >= thresholdTicks())
|
(TSC - TLD.LastFunctionEntryTSC) >= thresholdTicks())
|
||||||
break;
|
break;
|
||||||
rewindRecentCall(TSC, LastTSC, LastFunctionEntryTSC, FuncId);
|
rewindRecentCall(TSC, TLD.LastTSC, TLD.LastFunctionEntryTSC, FuncId);
|
||||||
return; // without writing log.
|
return; // without writing log.
|
||||||
case XRayEntryType::CUSTOM_EVENT: {
|
case XRayEntryType::CUSTOM_EVENT: {
|
||||||
// This is a bug in patching, so we'll report it once and move on.
|
// This is a bug in patching, so we'll report it once and move on.
|
||||||
|
@ -682,7 +710,7 @@ inline void processFunctionHook(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
writeFunctionRecord(FuncId, RecordTSCDelta, Entry, RecordPtr);
|
writeFunctionRecord(FuncId, RecordTSCDelta, Entry, TLD.RecordPtr);
|
||||||
|
|
||||||
// If we've exhausted the buffer by this time, we then release the buffer to
|
// If we've exhausted the buffer by this time, we then release the buffer to
|
||||||
// make sure that other threads may start using this buffer.
|
// make sure that other threads may start using this buffer.
|
||||||
|
|
Loading…
Reference in New Issue