llvm-project/compiler-rt/lib/xray/xray_fdr_controller.h

370 lines
12 KiB
C
Raw Normal View History

//===-- xray_fdr_controller.h ---------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is a part of XRay, a function call tracing system.
//
//===----------------------------------------------------------------------===//
#ifndef COMPILER_RT_LIB_XRAY_XRAY_FDR_CONTROLLER_H_
#define COMPILER_RT_LIB_XRAY_XRAY_FDR_CONTROLLER_H_
#include <limits>
#include <time.h>
#include "xray/xray_interface.h"
#include "xray/xray_records.h"
#include "xray_buffer_queue.h"
#include "xray_fdr_log_writer.h"
namespace __xray {
template <size_t Version = 5> class FDRController {
BufferQueue *BQ;
BufferQueue::Buffer &B;
FDRLogWriter &W;
int (*WallClockReader)(clockid_t, struct timespec *) = 0;
uint64_t CycleThreshold = 0;
uint64_t LastFunctionEntryTSC = 0;
uint64_t LatestTSC = 0;
uint16_t LatestCPU = 0;
tid_t TId = 0;
pid_t PId = 0;
bool First = true;
uint32_t UndoableFunctionEnters = 0;
uint32_t UndoableTailExits = 0;
bool finalized() const XRAY_NEVER_INSTRUMENT {
return BQ == nullptr || BQ->finalizing();
}
bool hasSpace(size_t S) XRAY_NEVER_INSTRUMENT {
return B.Data != nullptr && B.Generation == BQ->generation() &&
W.getNextRecord() + S <= reinterpret_cast<char *>(B.Data) + B.Size;
}
constexpr int32_t mask(int32_t FuncId) const XRAY_NEVER_INSTRUMENT {
return FuncId & ((1 << 29) - 1);
}
bool getNewBuffer() XRAY_NEVER_INSTRUMENT {
if (BQ->getBuffer(B) != BufferQueue::ErrorCode::Ok)
return false;
W.resetRecord();
DCHECK_EQ(W.getNextRecord(), B.Data);
LatestTSC = 0;
LatestCPU = 0;
First = true;
UndoableFunctionEnters = 0;
UndoableTailExits = 0;
atomic_store(&B.Extents, 0, memory_order_release);
return true;
}
bool setupNewBuffer() XRAY_NEVER_INSTRUMENT {
if (finalized())
return false;
DCHECK(hasSpace(sizeof(MetadataRecord) * 3));
TId = GetTid();
PId = internal_getpid();
struct timespec TS {
0, 0
};
WallClockReader(CLOCK_MONOTONIC, &TS);
MetadataRecord Metadata[] = {
// Write out a MetadataRecord to signify that this is the start of a new
// buffer, associated with a particular thread, with a new CPU. For the
// data, we have 15 bytes to squeeze as much information as we can. At
// this point we only write down the following bytes:
// - Thread ID (tid_t, cast to 4 bytes type due to Darwin being 8
// bytes)
createMetadataRecord<MetadataRecord::RecordKinds::NewBuffer>(
static_cast<int32_t>(TId)),
// Also write the WalltimeMarker record. We only really need microsecond
// precision here, and enforce across platforms that we need 64-bit
// seconds and 32-bit microseconds encoded in the Metadata record.
createMetadataRecord<MetadataRecord::RecordKinds::WalltimeMarker>(
static_cast<int64_t>(TS.tv_sec),
static_cast<int32_t>(TS.tv_nsec / 1000)),
// Also write the Pid record.
createMetadataRecord<MetadataRecord::RecordKinds::Pid>(
static_cast<int32_t>(PId)),
};
if (finalized())
return false;
return W.writeMetadataRecords(Metadata);
}
bool prepareBuffer(size_t S) XRAY_NEVER_INSTRUMENT {
if (finalized())
return returnBuffer();
if (UNLIKELY(!hasSpace(S))) {
if (!returnBuffer())
return false;
if (!getNewBuffer())
return false;
if (!setupNewBuffer())
return false;
}
if (First) {
First = false;
W.resetRecord();
atomic_store(&B.Extents, 0, memory_order_release);
return setupNewBuffer();
}
return true;
}
bool returnBuffer() XRAY_NEVER_INSTRUMENT {
if (BQ == nullptr)
return false;
First = true;
if (finalized()) {
BQ->releaseBuffer(B); // ignore result.
return false;
}
return BQ->releaseBuffer(B) == BufferQueue::ErrorCode::Ok;
}
enum class PreambleResult { NoChange, WroteMetadata, InvalidBuffer };
PreambleResult recordPreamble(uint64_t TSC,
uint16_t CPU) XRAY_NEVER_INSTRUMENT {
if (UNLIKELY(LatestCPU != CPU || LatestTSC == 0)) {
// We update our internal tracking state for the Latest TSC and CPU we've
// seen, then write out the appropriate metadata and function records.
LatestTSC = TSC;
LatestCPU = CPU;
if (B.Generation != BQ->generation())
return PreambleResult::InvalidBuffer;
W.writeMetadata<MetadataRecord::RecordKinds::NewCPUId>(CPU, TSC);
return PreambleResult::WroteMetadata;
}
DCHECK_EQ(LatestCPU, CPU);
if (UNLIKELY(LatestTSC > TSC ||
TSC - LatestTSC >
uint64_t{std::numeric_limits<int32_t>::max()})) {
// Either the TSC has wrapped around from the last TSC we've seen or the
// delta is too large to fit in a 32-bit signed integer, so we write a
// wrap-around record.
LatestTSC = TSC;
if (B.Generation != BQ->generation())
return PreambleResult::InvalidBuffer;
W.writeMetadata<MetadataRecord::RecordKinds::TSCWrap>(TSC);
return PreambleResult::WroteMetadata;
}
return PreambleResult::NoChange;
}
bool rewindRecords(int32_t FuncId, uint64_t TSC,
uint16_t CPU) XRAY_NEVER_INSTRUMENT {
// Undo one enter record, because at this point we are either at the state
// of:
// - We are exiting a function that we recently entered.
// - We are exiting a function that was the result of a sequence of tail
// exits, and we can check whether the tail exits can be re-wound.
//
FunctionRecord F;
W.undoWrites(sizeof(FunctionRecord));
if (B.Generation != BQ->generation())
return false;
internal_memcpy(&F, W.getNextRecord(), sizeof(FunctionRecord));
DCHECK(F.RecordKind ==
uint8_t(FunctionRecord::RecordKinds::FunctionEnter) &&
"Expected to find function entry recording when rewinding.");
DCHECK_EQ(F.FuncId, FuncId & ~(0x0F << 28));
LatestTSC -= F.TSCDelta;
if (--UndoableFunctionEnters != 0) {
LastFunctionEntryTSC -= F.TSCDelta;
return true;
}
LastFunctionEntryTSC = 0;
auto RewindingTSC = LatestTSC;
auto RewindingRecordPtr = W.getNextRecord() - sizeof(FunctionRecord);
while (UndoableTailExits) {
if (B.Generation != BQ->generation())
return false;
internal_memcpy(&F, RewindingRecordPtr, sizeof(FunctionRecord));
DCHECK_EQ(F.RecordKind,
uint8_t(FunctionRecord::RecordKinds::FunctionTailExit));
RewindingTSC -= F.TSCDelta;
RewindingRecordPtr -= sizeof(FunctionRecord);
if (B.Generation != BQ->generation())
return false;
internal_memcpy(&F, RewindingRecordPtr, sizeof(FunctionRecord));
// This tail call exceeded the threshold duration. It will not be erased.
if ((TSC - RewindingTSC) >= CycleThreshold) {
UndoableTailExits = 0;
return true;
}
--UndoableTailExits;
W.undoWrites(sizeof(FunctionRecord) * 2);
LatestTSC = RewindingTSC;
}
return true;
}
public:
template <class WallClockFunc>
FDRController(BufferQueue *BQ, BufferQueue::Buffer &B, FDRLogWriter &W,
WallClockFunc R, uint64_t C) XRAY_NEVER_INSTRUMENT
: BQ(BQ),
B(B),
W(W),
WallClockReader(R),
CycleThreshold(C) {}
bool functionEnter(int32_t FuncId, uint64_t TSC,
uint16_t CPU) XRAY_NEVER_INSTRUMENT {
if (finalized() ||
!prepareBuffer(sizeof(MetadataRecord) + sizeof(FunctionRecord)))
return returnBuffer();
auto PreambleStatus = recordPreamble(TSC, CPU);
if (PreambleStatus == PreambleResult::InvalidBuffer)
return returnBuffer();
UndoableFunctionEnters = (PreambleStatus == PreambleResult::WroteMetadata)
? 1
: UndoableFunctionEnters + 1;
auto Delta = TSC - LatestTSC;
LastFunctionEntryTSC = TSC;
LatestTSC = TSC;
return W.writeFunction(FDRLogWriter::FunctionRecordKind::Enter,
mask(FuncId), Delta);
}
bool functionTailExit(int32_t FuncId, uint64_t TSC,
uint16_t CPU) XRAY_NEVER_INSTRUMENT {
if (finalized())
return returnBuffer();
if (!prepareBuffer(sizeof(MetadataRecord) + sizeof(FunctionRecord)))
return returnBuffer();
auto PreambleStatus = recordPreamble(TSC, CPU);
if (PreambleStatus == PreambleResult::InvalidBuffer)
return returnBuffer();
if (PreambleStatus == PreambleResult::NoChange &&
UndoableFunctionEnters != 0 &&
TSC - LastFunctionEntryTSC < CycleThreshold)
return rewindRecords(FuncId, TSC, CPU);
UndoableTailExits = UndoableFunctionEnters ? UndoableTailExits + 1 : 0;
UndoableFunctionEnters = 0;
auto Delta = TSC - LatestTSC;
LatestTSC = TSC;
return W.writeFunction(FDRLogWriter::FunctionRecordKind::TailExit,
mask(FuncId), Delta);
}
bool functionEnterArg(int32_t FuncId, uint64_t TSC, uint16_t CPU,
uint64_t Arg) XRAY_NEVER_INSTRUMENT {
if (finalized() ||
!prepareBuffer((2 * sizeof(MetadataRecord)) + sizeof(FunctionRecord)) ||
recordPreamble(TSC, CPU) == PreambleResult::InvalidBuffer)
return returnBuffer();
auto Delta = TSC - LatestTSC;
LatestTSC = TSC;
LastFunctionEntryTSC = 0;
UndoableFunctionEnters = 0;
UndoableTailExits = 0;
[XRay] Improve FDR trace handling and error messaging Summary: This change covers a number of things spanning LLVM and compiler-rt, which are related in a non-trivial way. In LLVM, we have a library that handles the FDR mode even log loading, which uses C++'s runtime polymorphism feature to better faithfully represent the events that are written down by the FDR mode runtime. We do this by interpreting a trace that's serliased in a common format agreed upon by both the trace loading library and the FDR mode runtime. This library is under active development, which consists of features allowing us to reconstitute a higher-level event log. This event log is used by the conversion and visualisation tools we have for interpreting XRay traces. One of the tools we have is a diagnostic tool in llvm-xray called `fdr-dump` which we've been using to debug our expectations of what the FDR runtime should be writing and what the logical FDR event log structures are. We use this fairly extensively to reason about why some non-trivial traces we're generating with FDR mode runtimes fail to convert or fail to parse correctly. One of these failures we've found in manual debugging of some of the traces we've seen involve an inconsistency between the buffer extents (a record indicating how many bytes to follow are part of a logical thread's event log) and the record of the bytes written into the log -- sometimes it turns out the data could be garbage, due to buffers being recycled, but sometimes we're seeing the buffer extent indicating a log is "shorter" than the actual records associated with the buffer. This case happens particularly with function entry records with a call argument. This change for now updates the FDR mode runtime to write the bytes for the function call and arg record before updating the buffer extents atomically, allowing multiple threads to see a consistent view of the data in the buffer using the atomic counter associated with a buffer. What we're trying to prevent here is partial updates where we see the intermediary updates to the buffer extents (function record size then call argument record size) becoming observable from another thread, for instance, one doing the serialization/flushing. To do both diagnose this issue properly, we need to be able to honour the extents being set in the `BufferExtents` records marking the beginning of the logical buffers when reading an FDR trace. Since LLVM doesn't use C++'s RTTI mechanism, we instead follow the advice in the documentation for LLVM Style RTTI (https://llvm.org/docs/HowToSetUpLLVMStyleRTTI.html). We then rely on this RTTI feature to ensure that our file-based record producer (our streaming "deserializer") can honour the extents of individual buffers as we interpret traces. This also sets us up to be able to eventually do smart skipping/continuation of FDR logs, seeking instead to find BufferExtents records in cases where we find potentially recoverable errors. In the meantime, we make this change to operate in a strict mode when reading logical buffers with extent records. Reviewers: mboerger Subscribers: hiraditya, llvm-commits, jfb Differential Revision: https://reviews.llvm.org/D54201 llvm-svn: 346473
2018-11-09 14:26:48 +08:00
return W.writeFunctionWithArg(FDRLogWriter::FunctionRecordKind::EnterArg,
mask(FuncId), Delta, Arg);
}
bool functionExit(int32_t FuncId, uint64_t TSC,
uint16_t CPU) XRAY_NEVER_INSTRUMENT {
if (finalized() ||
!prepareBuffer(sizeof(MetadataRecord) + sizeof(FunctionRecord)))
return returnBuffer();
auto PreambleStatus = recordPreamble(TSC, CPU);
if (PreambleStatus == PreambleResult::InvalidBuffer)
return returnBuffer();
if (PreambleStatus == PreambleResult::NoChange &&
UndoableFunctionEnters != 0 &&
TSC - LastFunctionEntryTSC < CycleThreshold)
return rewindRecords(FuncId, TSC, CPU);
auto Delta = TSC - LatestTSC;
LatestTSC = TSC;
UndoableFunctionEnters = 0;
UndoableTailExits = 0;
return W.writeFunction(FDRLogWriter::FunctionRecordKind::Exit, mask(FuncId),
Delta);
}
bool customEvent(uint64_t TSC, uint16_t CPU, const void *Event,
int32_t EventSize) XRAY_NEVER_INSTRUMENT {
if (finalized() ||
!prepareBuffer((2 * sizeof(MetadataRecord)) + EventSize) ||
recordPreamble(TSC, CPU) == PreambleResult::InvalidBuffer)
return returnBuffer();
auto Delta = TSC - LatestTSC;
LatestTSC = TSC;
UndoableFunctionEnters = 0;
UndoableTailExits = 0;
return W.writeCustomEvent(Delta, Event, EventSize);
}
bool typedEvent(uint64_t TSC, uint16_t CPU, uint16_t EventType,
const void *Event, int32_t EventSize) XRAY_NEVER_INSTRUMENT {
if (finalized() ||
!prepareBuffer((2 * sizeof(MetadataRecord)) + EventSize) ||
recordPreamble(TSC, CPU) == PreambleResult::InvalidBuffer)
return returnBuffer();
auto Delta = TSC - LatestTSC;
LatestTSC = TSC;
UndoableFunctionEnters = 0;
UndoableTailExits = 0;
return W.writeTypedEvent(Delta, EventType, Event, EventSize);
}
bool flush() XRAY_NEVER_INSTRUMENT {
if (finalized()) {
returnBuffer(); // ignore result.
return true;
}
return returnBuffer();
}
};
} // namespace __xray
#endif // COMPILER-RT_LIB_XRAY_XRAY_FDR_CONTROLLER_H_