forked from OSchip/llvm-project
[XRay] [compiler-rt] Refactor logic for xray fdr logging. NFC.
Summary: Separated the IO and the thread local storage state machine of logging from the writing of log records once the contents are deterministic. Finer granularity functions are provided as inline functions in the same header such that stack does not grow due to the functions being separated. An executable utility xray_fdr_log_printer is also implemented to use the finest granularity functions to produce binary test data in the FDR format with a relatively convenient text input. For example, one can take a file with textual contents layed out in rows and feed it to the binary to generate data that llvm-xray convert can then read. This is a convenient way to build a test suite for llvm-xray convert to ensure it's robust to the fdr format. Example: $cat myFile.txt NewBuffer : { time = 2 , Tid=5} NewCPU : { CPU =1 , TSC = 123} Function : { FuncId = 5, TSCDelta = 3, EntryType = Entry } Function : { FuncId = 5, TSCDelta = 5, EntryType = Exit} TSCWrap : { TSC = 678 } Function : { FuncId = 6, TSCDelta = 0, EntryType = Entry } Function : { FuncId = 6, TSCDelta = 50, EntryType = Exit } EOB : { } $cat myFile.txt | ./bin/xray_fdr_log_printer > /tmp/binarydata.bin $./bin/llvm-xray convert -output-format=yaml -output=- /tmp/binarydata.bin yaml format comes out as expected. Reviewers: dberris, pelikan Reviewed By: dberris Subscribers: llvm-commits, mgorny Differential Revision: https://reviews.llvm.org/D30850 llvm-svn: 297801
This commit is contained in:
parent
66443d80f1
commit
c6ee33852b
|
@ -2,3 +2,5 @@ add_xray_unittest(XRayBufferQueueTest SOURCES
|
|||
buffer_queue_test.cc xray_unit_test_main.cc)
|
||||
add_xray_unittest(XRayFDRLoggingTest SOURCES
|
||||
fdr_logging_test.cc xray_unit_test_main.cc)
|
||||
|
||||
add_executable(xray_fdr_log_printer xray_fdr_log_printer_tool.cc)
|
||||
|
|
|
@ -0,0 +1,329 @@
|
|||
//===-- xray_fdr_log_printer_tool.cc --------------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file is a part of XRay, a function call tracing system.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "xray_fdr_logging.h"
|
||||
#include "xray_fdr_logging_impl.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
|
||||
#include "xray/xray_records.h"
|
||||
|
||||
// Writes out xray fdr mode log records to stdout based on a sequence of
|
||||
// formatted data read from stdin.
|
||||
//
|
||||
// Interprets an adhoc format of Top Level Types and parameter maps of the form:
|
||||
//
|
||||
// RecordType : { Parameter1 = Value, Parameter2 = value , Parameter3 = value}
|
||||
// OtherRecordType : { ParameterOne = Value }
|
||||
//
|
||||
// Each line corresponds to a type of record written by the Xray Flight Data
|
||||
// Recorder mode to a buffer. This program synthesizes records in the FDR binary
|
||||
// format and writes them to std::cout.
|
||||
|
||||
namespace {
|
||||
|
||||
/// A crude lexer to read tokens and skip over whitespace.
|
||||
class TokenReader {
|
||||
public:
|
||||
TokenReader() : LastDelimPresent(false), FoundEof(false), LastDelim(0) {}
|
||||
std::string readToken(std::istream &Stream);
|
||||
bool hasLastDelim() const { return LastDelimPresent; }
|
||||
char getLastDelim() const { return LastDelim; }
|
||||
void setLastDelim(char Delim) {
|
||||
LastDelimPresent = true;
|
||||
LastDelim = Delim;
|
||||
}
|
||||
void clearLastDelim() {
|
||||
LastDelimPresent = false;
|
||||
LastDelim = 0;
|
||||
}
|
||||
bool isEof() { return FoundEof; }
|
||||
void setFoundEof(bool eof) { FoundEof = eof; }
|
||||
|
||||
private:
|
||||
bool LastDelimPresent;
|
||||
bool FoundEof;
|
||||
char LastDelim;
|
||||
};
|
||||
|
||||
// Globally tracks whether we reached EOF and caches delimiters that qualify as
|
||||
// tokens.
|
||||
static TokenReader TokenReader{};
|
||||
|
||||
bool isWhitespace(char c) {
|
||||
// Hardcode the whitespace characters we will not return as tokens even though
|
||||
// they are token delimiters.
|
||||
static const std::vector<char> Whitespace{' ', '\n', '\t'};
|
||||
return std::find(Whitespace.begin(), Whitespace.end(), c) != Whitespace.end();
|
||||
}
|
||||
|
||||
bool isDelimiter(char c) {
|
||||
// Hardcode a set of token delimiters.
|
||||
static const std::vector<char> Delimiters{' ', ':', ',', '\n',
|
||||
'\t', '{', '}', '='};
|
||||
return std::find(Delimiters.begin(), Delimiters.end(), c) != Delimiters.end();
|
||||
}
|
||||
|
||||
std::string TokenReader::readToken(std::istream &Stream) {
|
||||
// If on the last call we read a trailing delimiter that also qualifies as a
|
||||
// token, return it now.
|
||||
if (hasLastDelim()) {
|
||||
char Token = getLastDelim();
|
||||
clearLastDelim();
|
||||
return std::string{Token};
|
||||
}
|
||||
|
||||
std::stringstream Builder{};
|
||||
char c;
|
||||
c = Stream.get();
|
||||
while (!isDelimiter(c) && !Stream.eof()) {
|
||||
Builder << c;
|
||||
c = Stream.get();
|
||||
}
|
||||
|
||||
setFoundEof(Stream.eof());
|
||||
|
||||
std::string Token = Builder.str();
|
||||
|
||||
if (Token.empty()) {
|
||||
// We read a whitespace delimiter only. Skip over it.
|
||||
if (!isEof() && isWhitespace(c)) {
|
||||
return readToken(Stream);
|
||||
} else if (isWhitespace(c)) {
|
||||
// We only read a whitespace delimiter.
|
||||
return "";
|
||||
} else {
|
||||
// We read a delimiter that matters as a token.
|
||||
return std::string{c};
|
||||
}
|
||||
}
|
||||
|
||||
// If we found a delimiter that's a valid token. Store it to return as the
|
||||
// next token.
|
||||
if (!isWhitespace(c))
|
||||
setLastDelim(c);
|
||||
|
||||
return Token;
|
||||
}
|
||||
|
||||
// Reads an expected token or dies a gruesome death.
|
||||
void eatExpectedToken(std::istream &Stream, const std::string &Expected) {
|
||||
std::string Token = TokenReader.readToken(Stream);
|
||||
if (Token.compare(Expected) != 0) {
|
||||
std::cerr << "Expecting token '" << Expected << "'. Found '" << Token
|
||||
<< "'.\n";
|
||||
std::exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Constructs a map of key value pairs from a token stream.
|
||||
// Expects to read an expression of the form:
|
||||
//
|
||||
// { a = b, c = d, e = f}
|
||||
//
|
||||
// If not, the driver will crash.
|
||||
std::map<std::string, std::string> readMap(std::istream &Stream) {
|
||||
using StrMap = std::map<std::string, std::string>;
|
||||
using StrVector = std::vector<std::string>;
|
||||
|
||||
eatExpectedToken(Stream, "{");
|
||||
StrVector TokenList{};
|
||||
|
||||
while (!TokenReader.isEof()) {
|
||||
std::string CurrentToken = TokenReader.readToken(Stream);
|
||||
if (CurrentToken.compare("}") == 0) {
|
||||
break;
|
||||
}
|
||||
TokenList.push_back(CurrentToken);
|
||||
if (TokenReader.isEof()) {
|
||||
std::cerr << "Got EOF while building a param map.\n";
|
||||
std::exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
if (TokenList.size() == 0) {
|
||||
StrMap EmptyMap{};
|
||||
return EmptyMap;
|
||||
}
|
||||
if (TokenList.size() % 4 != 3) {
|
||||
std::cerr << "Error while building token map. Expected triples of tokens "
|
||||
"in the form 'a = b' separated by commas.\n";
|
||||
std::exit(1);
|
||||
}
|
||||
|
||||
StrMap TokenMap{};
|
||||
std::size_t ElementIndex = 0;
|
||||
for (; ElementIndex < TokenList.size(); ElementIndex += 4) {
|
||||
if (TokenList[ElementIndex + 1].compare("=") != 0) {
|
||||
std::cerr << "Expected an assignment when building a param map.\n";
|
||||
std::exit(1);
|
||||
}
|
||||
TokenMap[TokenList[ElementIndex]] = TokenList[ElementIndex + 2];
|
||||
if (ElementIndex + 3 < TokenList.size()) {
|
||||
if (TokenList[ElementIndex + 3].compare(",") != 0) {
|
||||
std::cerr << "Expected assignment statements to be separated by commas."
|
||||
<< "\n";
|
||||
std::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
return TokenMap;
|
||||
}
|
||||
|
||||
std::string getOrDie(const std::map<std::string, std::string> &Lookup,
|
||||
const std::string &Key) {
|
||||
auto MapIter = Lookup.find(Key);
|
||||
if (MapIter == Lookup.end()) {
|
||||
std::cerr << "Expected key '" << Key << "'. Was not found.\n";
|
||||
std::exit(1);
|
||||
}
|
||||
return MapIter->second;
|
||||
}
|
||||
|
||||
// Reads a numeric type from a string token through the magic of
|
||||
// std::stringstream.
|
||||
template <typename NT> struct NumberParser {
|
||||
static NT parse(const std::string &Input) {
|
||||
NT Number = 0;
|
||||
std::stringstream Stream(Input);
|
||||
Stream >> Number;
|
||||
return Number;
|
||||
}
|
||||
};
|
||||
|
||||
void writeNewBufferOrDie(std::istream &Input) {
|
||||
auto TokenMap = readMap(Input);
|
||||
pid_t Tid = NumberParser<pid_t>::parse(getOrDie(TokenMap, "Tid"));
|
||||
time_t Time = NumberParser<time_t>::parse(getOrDie(TokenMap, "time"));
|
||||
timespec TimeSpec = {Time, 0};
|
||||
constexpr const size_t OutputSize = 32;
|
||||
std::array<char, OutputSize> Buffer{};
|
||||
char *MemPtr = Buffer.data();
|
||||
__xray::__xray_fdr_internal::writeNewBufferPreamble(Tid, TimeSpec, MemPtr);
|
||||
std::cout.write(Buffer.data(), OutputSize);
|
||||
}
|
||||
|
||||
void writeNewCPUIdOrDie(std::istream &Input) {
|
||||
auto TokenMap = readMap(Input);
|
||||
uint16_t CPU = NumberParser<uint16_t>::parse(getOrDie(TokenMap, "CPU"));
|
||||
uint64_t TSC = NumberParser<uint64_t>::parse(getOrDie(TokenMap, "TSC"));
|
||||
constexpr const size_t OutputSize = 16;
|
||||
std::array<char, OutputSize> Buffer{};
|
||||
char *MemPtr = Buffer.data();
|
||||
__xray::__xray_fdr_internal::writeNewCPUIdMetadata(CPU, TSC, MemPtr);
|
||||
std::cout.write(Buffer.data(), OutputSize);
|
||||
}
|
||||
|
||||
void writeEOBOrDie(std::istream &Input) {
|
||||
auto TokenMap = readMap(Input);
|
||||
constexpr const size_t OutputSize = 16;
|
||||
std::array<char, OutputSize> Buffer{};
|
||||
char *MemPtr = Buffer.data();
|
||||
__xray::__xray_fdr_internal::writeEOBMetadata(MemPtr);
|
||||
std::cout.write(Buffer.data(), OutputSize);
|
||||
}
|
||||
|
||||
void writeTSCWrapOrDie(std::istream &Input) {
|
||||
auto TokenMap = readMap(Input);
|
||||
uint64_t TSC = NumberParser<uint64_t>::parse(getOrDie(TokenMap, "TSC"));
|
||||
constexpr const size_t OutputSize = 16;
|
||||
std::array<char, OutputSize> Buffer{};
|
||||
char *MemPtr = Buffer.data();
|
||||
__xray::__xray_fdr_internal::writeTSCWrapMetadata(TSC, MemPtr);
|
||||
std::cout.write(Buffer.data(), OutputSize);
|
||||
}
|
||||
|
||||
XRayEntryType decodeEntryType(const std::string &EntryTypeStr) {
|
||||
if (EntryTypeStr.compare("Entry") == 0) {
|
||||
return XRayEntryType::ENTRY;
|
||||
} else if (EntryTypeStr.compare("LogArgsEntry") == 0) {
|
||||
return XRayEntryType::LOG_ARGS_ENTRY;
|
||||
} else if (EntryTypeStr.compare("Exit") == 0) {
|
||||
return XRayEntryType::EXIT;
|
||||
} else if (EntryTypeStr.compare("Tail") == 0) {
|
||||
return XRayEntryType::TAIL;
|
||||
}
|
||||
std::cerr << "Illegal entry type " << EntryTypeStr << ".\n";
|
||||
std::exit(1);
|
||||
}
|
||||
|
||||
void writeFunctionOrDie(std::istream &Input) {
|
||||
auto TokenMap = readMap(std::cin);
|
||||
int FuncId = NumberParser<int>::parse(getOrDie(TokenMap, "FuncId"));
|
||||
uint32_t TSCDelta =
|
||||
NumberParser<uint32_t>::parse(getOrDie(TokenMap, "TSCDelta"));
|
||||
std::string EntryType = getOrDie(TokenMap, "EntryType");
|
||||
XRayEntryType XrayEntryType = decodeEntryType(EntryType);
|
||||
constexpr const size_t OutputSize = 8;
|
||||
std::array<char, OutputSize> Buffer{};
|
||||
char *MemPtr = Buffer.data();
|
||||
__xray::__xray_fdr_internal::writeFunctionRecord(FuncId, TSCDelta,
|
||||
XrayEntryType, MemPtr);
|
||||
std::cout.write(Buffer.data(), OutputSize);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
std::map<std::string, std::function<void(std::istream &)>> TopLevelRecordMap;
|
||||
TopLevelRecordMap["NewBuffer"] = writeNewBufferOrDie;
|
||||
TopLevelRecordMap["NewCPU"] = writeNewCPUIdOrDie;
|
||||
TopLevelRecordMap["EOB"] = writeEOBOrDie;
|
||||
TopLevelRecordMap["TSCWrap"] = writeTSCWrapOrDie;
|
||||
TopLevelRecordMap["Function"] = writeFunctionOrDie;
|
||||
|
||||
// Write file header
|
||||
//
|
||||
// (2) uint16 : version
|
||||
// (2) uint16 : type
|
||||
// (4) uint32 : bitfield
|
||||
// (8) uint64 : cycle frequency
|
||||
// (16) - : padding
|
||||
uint16_t HeaderVersion = 1;
|
||||
uint16_t HeaderType = 1;
|
||||
uint32_t Bitfield = 3;
|
||||
uint64_t CycleFreq = 42;
|
||||
constexpr const size_t HeaderSize = 32;
|
||||
std::array<char, HeaderSize> Header{};
|
||||
std::memcpy(Header.data(), &HeaderVersion, sizeof(HeaderVersion));
|
||||
std::memcpy(Header.data() + 2, &HeaderType, sizeof(HeaderType));
|
||||
std::memcpy(Header.data() + 4, &Bitfield, sizeof(Bitfield));
|
||||
std::memcpy(Header.data() + 8, &CycleFreq, sizeof(CycleFreq));
|
||||
std::cout.write(Header.data(), HeaderSize);
|
||||
|
||||
std::string CurrentToken;
|
||||
while (true) {
|
||||
CurrentToken = TokenReader.readToken(std::cin);
|
||||
if (TokenReader.isEof())
|
||||
break;
|
||||
auto MapIter = TopLevelRecordMap.find(CurrentToken);
|
||||
if (MapIter != TopLevelRecordMap.end()) {
|
||||
eatExpectedToken(std::cin, ":");
|
||||
if (TokenReader.isEof()) {
|
||||
std::cerr << "Got eof when expecting to read a map.\n";
|
||||
std::exit(1);
|
||||
}
|
||||
MapIter->second(std::cin);
|
||||
} else {
|
||||
std::cerr << "Got bad top level instruction '" << CurrentToken << "'.\n";
|
||||
std::exit(1);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
//===-- xray_fdr_log_records.h -------------------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file is a part of XRay, a function call tracing system.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef XRAY_XRAY_FDR_LOG_RECORDS_H
|
||||
#define XRAY_XRAY_FDR_LOG_RECORDS_H
|
||||
|
||||
enum class RecordType : uint8_t { Function, Metadata };
|
||||
|
||||
// A MetadataRecord encodes the kind of record in its first byte, and have 15
|
||||
// additional bytes in the end to hold free-form data.
|
||||
struct alignas(16) MetadataRecord {
|
||||
// A MetadataRecord must always have a type of 1.
|
||||
/* RecordType */ uint8_t Type : 1;
|
||||
|
||||
// Each kind of record is represented as a 7-bit value (even though we use an
|
||||
// unsigned 8-bit enum class to do so).
|
||||
enum class RecordKinds : uint8_t {
|
||||
NewBuffer,
|
||||
EndOfBuffer,
|
||||
NewCPUId,
|
||||
TSCWrap,
|
||||
WalltimeMarker,
|
||||
};
|
||||
// Use 7 bits to identify this record type.
|
||||
/* RecordKinds */ uint8_t RecordKind : 7;
|
||||
char Data[15];
|
||||
} __attribute__((packed));
|
||||
|
||||
static_assert(sizeof(MetadataRecord) == 16, "Wrong size for MetadataRecord.");
|
||||
|
||||
struct alignas(8) FunctionRecord {
|
||||
// A FunctionRecord must always have a type of 0.
|
||||
/* RecordType */ uint8_t Type : 1;
|
||||
enum class RecordKinds {
|
||||
FunctionEnter = 0x00,
|
||||
FunctionExit = 0x01,
|
||||
FunctionTailExit = 0x02,
|
||||
};
|
||||
/* RecordKinds */ uint8_t RecordKind : 3;
|
||||
|
||||
// We only use 28 bits of the function ID, so that we can use as few bytes as
|
||||
// possible. This means we only support 2^28 (268,435,456) unique function ids
|
||||
// in a single binary.
|
||||
int FuncId : 28;
|
||||
|
||||
// We use another 4 bytes to hold the delta between the previous entry's TSC.
|
||||
// In case we've found that the distance is greater than the allowable 32 bits
|
||||
// (either because we are running in a different CPU and the TSC might be
|
||||
// different then), we should use a MetadataRecord before this FunctionRecord
|
||||
// that will contain the full TSC for that CPU, and keep this to 0.
|
||||
uint32_t TSCDelta;
|
||||
} __attribute__((packed));
|
||||
|
||||
static_assert(sizeof(FunctionRecord) == 8, "Wrong size for FunctionRecord.");
|
||||
|
||||
#endif // XRAY_XRAY_FDR_LOG_RECORDS_H
|
|
@ -7,7 +7,7 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file is a part of XRay, a dynamic runtime instruementation system.
|
||||
// This file is a part of XRay, a dynamic runtime instrumentation system.
|
||||
//
|
||||
// Here we implement the Flight Data Recorder mode for XRay, where we use
|
||||
// compact structures to store records in memory as well as when writing out the
|
||||
|
@ -31,6 +31,7 @@
|
|||
#include "xray/xray_records.h"
|
||||
#include "xray_buffer_queue.h"
|
||||
#include "xray_defs.h"
|
||||
#include "xray_fdr_logging_impl.h"
|
||||
#include "xray_flags.h"
|
||||
#include "xray_tsc.h"
|
||||
#include "xray_utils.h"
|
||||
|
@ -49,8 +50,8 @@ std::atomic<XRayLogFlushStatus> LogFlushStatus{
|
|||
std::unique_ptr<FDRLoggingOptions> FDROptions;
|
||||
|
||||
XRayLogInitStatus fdrLoggingInit(std::size_t BufferSize, std::size_t BufferMax,
|
||||
void *Options,
|
||||
size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
|
||||
void *Options,
|
||||
size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
|
||||
assert(OptionsSize == sizeof(FDRLoggingOptions));
|
||||
XRayLogInitStatus CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
|
||||
if (!LoggingStatus.compare_exchange_strong(
|
||||
|
@ -174,352 +175,16 @@ XRayLogInitStatus fdrLoggingReset() XRAY_NEVER_INSTRUMENT {
|
|||
return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
|
||||
}
|
||||
|
||||
namespace {
|
||||
thread_local BufferQueue::Buffer Buffer;
|
||||
thread_local char *RecordPtr = nullptr;
|
||||
|
||||
void setupNewBuffer(const BufferQueue::Buffer &Buffer) XRAY_NEVER_INSTRUMENT {
|
||||
RecordPtr = static_cast<char *>(Buffer.Buffer);
|
||||
|
||||
static constexpr int InitRecordsCount = 2;
|
||||
std::aligned_storage<sizeof(MetadataRecord)>::type Records[InitRecordsCount];
|
||||
{
|
||||
// Write out a MetadataRecord to signify that this is the start of a new
|
||||
// buffer, associated with a particular thread, with a new CPU. For the
|
||||
// data, we have 15 bytes to squeeze as much information as we can. At this
|
||||
// point we only write down the following bytes:
|
||||
// - Thread ID (pid_t, 4 bytes)
|
||||
auto &NewBuffer = *reinterpret_cast<MetadataRecord *>(&Records[0]);
|
||||
NewBuffer.Type = uint8_t(RecordType::Metadata);
|
||||
NewBuffer.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewBuffer);
|
||||
pid_t Tid = syscall(SYS_gettid);
|
||||
std::memcpy(&NewBuffer.Data, &Tid, sizeof(pid_t));
|
||||
}
|
||||
|
||||
// Also write the WalltimeMarker record.
|
||||
{
|
||||
static_assert(sizeof(time_t) <= 8, "time_t needs to be at most 8 bytes");
|
||||
auto &WalltimeMarker = *reinterpret_cast<MetadataRecord *>(&Records[1]);
|
||||
WalltimeMarker.Type = uint8_t(RecordType::Metadata);
|
||||
WalltimeMarker.RecordKind =
|
||||
uint8_t(MetadataRecord::RecordKinds::WalltimeMarker);
|
||||
timespec TS{0, 0};
|
||||
clock_gettime(CLOCK_MONOTONIC, &TS);
|
||||
|
||||
// We only really need microsecond precision here, and enforce across
|
||||
// platforms that we need 64-bit seconds and 32-bit microseconds encoded in
|
||||
// the Metadata record.
|
||||
int32_t Micros = TS.tv_nsec / 1000;
|
||||
int64_t Seconds = TS.tv_sec;
|
||||
std::memcpy(WalltimeMarker.Data, &Seconds, sizeof(Seconds));
|
||||
std::memcpy(WalltimeMarker.Data + sizeof(Seconds), &Micros, sizeof(Micros));
|
||||
}
|
||||
std::memcpy(RecordPtr, Records, sizeof(MetadataRecord) * InitRecordsCount);
|
||||
RecordPtr += sizeof(MetadataRecord) * InitRecordsCount;
|
||||
}
|
||||
|
||||
void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC) XRAY_NEVER_INSTRUMENT {
|
||||
MetadataRecord NewCPUId;
|
||||
NewCPUId.Type = uint8_t(RecordType::Metadata);
|
||||
NewCPUId.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewCPUId);
|
||||
|
||||
// The data for the New CPU will contain the following bytes:
|
||||
// - CPU ID (uint16_t, 2 bytes)
|
||||
// - Full TSC (uint64_t, 8 bytes)
|
||||
// Total = 12 bytes.
|
||||
std::memcpy(&NewCPUId.Data, &CPU, sizeof(CPU));
|
||||
std::memcpy(&NewCPUId.Data[sizeof(CPU)], &TSC, sizeof(TSC));
|
||||
std::memcpy(RecordPtr, &NewCPUId, sizeof(MetadataRecord));
|
||||
RecordPtr += sizeof(MetadataRecord);
|
||||
}
|
||||
|
||||
void writeEOBMetadata() XRAY_NEVER_INSTRUMENT {
|
||||
MetadataRecord EOBMeta;
|
||||
EOBMeta.Type = uint8_t(RecordType::Metadata);
|
||||
EOBMeta.RecordKind = uint8_t(MetadataRecord::RecordKinds::EndOfBuffer);
|
||||
// For now we don't write any bytes into the Data field.
|
||||
std::memcpy(RecordPtr, &EOBMeta, sizeof(MetadataRecord));
|
||||
RecordPtr += sizeof(MetadataRecord);
|
||||
}
|
||||
|
||||
void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT {
|
||||
MetadataRecord TSCWrap;
|
||||
TSCWrap.Type = uint8_t(RecordType::Metadata);
|
||||
TSCWrap.RecordKind = uint8_t(MetadataRecord::RecordKinds::TSCWrap);
|
||||
|
||||
// The data for the TSCWrap record contains the following bytes:
|
||||
// - Full TSC (uint64_t, 8 bytes)
|
||||
// Total = 8 bytes.
|
||||
std::memcpy(&TSCWrap.Data, &TSC, sizeof(TSC));
|
||||
std::memcpy(RecordPtr, &TSCWrap, sizeof(MetadataRecord));
|
||||
RecordPtr += sizeof(MetadataRecord);
|
||||
}
|
||||
|
||||
constexpr auto MetadataRecSize = sizeof(MetadataRecord);
|
||||
constexpr auto FunctionRecSize = sizeof(FunctionRecord);
|
||||
|
||||
class ThreadExitBufferCleanup {
|
||||
std::weak_ptr<BufferQueue> Buffers;
|
||||
BufferQueue::Buffer &Buffer;
|
||||
|
||||
public:
|
||||
explicit ThreadExitBufferCleanup(std::weak_ptr<BufferQueue> BQ,
|
||||
BufferQueue::Buffer &Buffer)
|
||||
XRAY_NEVER_INSTRUMENT : Buffers(BQ),
|
||||
Buffer(Buffer) {}
|
||||
|
||||
~ThreadExitBufferCleanup() noexcept XRAY_NEVER_INSTRUMENT {
|
||||
if (RecordPtr == nullptr)
|
||||
return;
|
||||
|
||||
// We make sure that upon exit, a thread will write out the EOB
|
||||
// MetadataRecord in the thread-local log, and also release the buffer to
|
||||
// the queue.
|
||||
assert((RecordPtr + MetadataRecSize) - static_cast<char *>(Buffer.Buffer) >=
|
||||
static_cast<ptrdiff_t>(MetadataRecSize));
|
||||
if (auto BQ = Buffers.lock()) {
|
||||
writeEOBMetadata();
|
||||
if (auto EC = BQ->releaseBuffer(Buffer))
|
||||
Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
|
||||
EC.message().c_str());
|
||||
return;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class RecursionGuard {
|
||||
bool &Running;
|
||||
const bool Valid;
|
||||
|
||||
public:
|
||||
explicit RecursionGuard(bool &R) : Running(R), Valid(!R) {
|
||||
if (Valid)
|
||||
Running = true;
|
||||
}
|
||||
|
||||
RecursionGuard(const RecursionGuard &) = delete;
|
||||
RecursionGuard(RecursionGuard &&) = delete;
|
||||
RecursionGuard &operator=(const RecursionGuard &) = delete;
|
||||
RecursionGuard &operator=(RecursionGuard &&) = delete;
|
||||
|
||||
explicit operator bool() const { return Valid; }
|
||||
|
||||
~RecursionGuard() noexcept {
|
||||
if (Valid)
|
||||
Running = false;
|
||||
}
|
||||
};
|
||||
|
||||
inline bool loggingInitialized() {
|
||||
return LoggingStatus.load(std::memory_order_acquire) ==
|
||||
XRayLogInitStatus::XRAY_LOG_INITIALIZED;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void fdrLoggingHandleArg0(int32_t FuncId,
|
||||
XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
|
||||
XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
|
||||
// We want to get the TSC as early as possible, so that we can check whether
|
||||
// we've seen this CPU before. We also do it before we load anything else, to
|
||||
// allow for forward progress with the scheduling.
|
||||
unsigned char CPU;
|
||||
uint64_t TSC = __xray::readTSC(CPU);
|
||||
|
||||
// Bail out right away if logging is not initialized yet.
|
||||
if (LoggingStatus.load(std::memory_order_acquire) !=
|
||||
XRayLogInitStatus::XRAY_LOG_INITIALIZED)
|
||||
return;
|
||||
|
||||
// We use a thread_local variable to keep track of which CPUs we've already
|
||||
// run, and the TSC times for these CPUs. This allows us to stop repeating the
|
||||
// CPU field in the function records.
|
||||
//
|
||||
// We assume that we'll support only 65536 CPUs for x86_64.
|
||||
thread_local uint16_t CurrentCPU = std::numeric_limits<uint16_t>::max();
|
||||
thread_local uint64_t LastTSC = 0;
|
||||
|
||||
// Make sure a thread that's ever called handleArg0 has a thread-local
|
||||
// live reference to the buffer queue for this particular instance of
|
||||
// FDRLogging, and that we're going to clean it up when the thread exits.
|
||||
thread_local auto LocalBQ = BQ;
|
||||
thread_local ThreadExitBufferCleanup Cleanup(LocalBQ, Buffer);
|
||||
|
||||
// Prevent signal handler recursion, so in case we're already in a log writing
|
||||
// mode and the signal handler comes in (and is also instrumented) then we
|
||||
// don't want to be clobbering potentially partial writes already happening in
|
||||
// the thread. We use a simple thread_local latch to only allow one on-going
|
||||
// handleArg0 to happen at any given time.
|
||||
thread_local bool Running = false;
|
||||
RecursionGuard Guard{Running};
|
||||
if (!Guard) {
|
||||
assert(Running == true && "RecursionGuard is buggy!");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!loggingInitialized() || LocalBQ->finalizing()) {
|
||||
writeEOBMetadata();
|
||||
if (auto EC = BQ->releaseBuffer(Buffer)) {
|
||||
Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
|
||||
EC.message().c_str());
|
||||
return;
|
||||
}
|
||||
RecordPtr = nullptr;
|
||||
}
|
||||
|
||||
if (Buffer.Buffer == nullptr) {
|
||||
if (auto EC = LocalBQ->getBuffer(Buffer)) {
|
||||
auto LS = LoggingStatus.load(std::memory_order_acquire);
|
||||
if (LS != XRayLogInitStatus::XRAY_LOG_FINALIZING &&
|
||||
LS != XRayLogInitStatus::XRAY_LOG_FINALIZED)
|
||||
Report("Failed to acquire a buffer; error=%s\n", EC.message().c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
setupNewBuffer(Buffer);
|
||||
}
|
||||
|
||||
if (CurrentCPU == std::numeric_limits<uint16_t>::max()) {
|
||||
// This means this is the first CPU this thread has ever run on. We set the
|
||||
// current CPU and record this as the first TSC we've seen.
|
||||
CurrentCPU = CPU;
|
||||
writeNewCPUIdMetadata(CPU, TSC);
|
||||
}
|
||||
|
||||
// Before we go setting up writing new function entries, we need to be really
|
||||
// careful about the pointer math we're doing. This means we need to ensure
|
||||
// that the record we are about to write is going to fit into the buffer,
|
||||
// without overflowing the buffer.
|
||||
//
|
||||
// To do this properly, we use the following assumptions:
|
||||
//
|
||||
// - The least number of bytes we will ever write is 8
|
||||
// (sizeof(FunctionRecord)) only if the delta between the previous entry
|
||||
// and this entry is within 32 bits.
|
||||
// - The most number of bytes we will ever write is 8 + 16 = 24. This is
|
||||
// computed by:
|
||||
//
|
||||
// sizeof(FunctionRecord) + sizeof(MetadataRecord)
|
||||
//
|
||||
// These arise in the following cases:
|
||||
//
|
||||
// 1. When the delta between the TSC we get and the previous TSC for the
|
||||
// same CPU is outside of the uint32_t range, we end up having to
|
||||
// write a MetadataRecord to indicate a "tsc wrap" before the actual
|
||||
// FunctionRecord.
|
||||
// 2. When we learn that we've moved CPUs, we need to write a
|
||||
// MetadataRecord to indicate a "cpu change", and thus write out the
|
||||
// current TSC for that CPU before writing out the actual
|
||||
// FunctionRecord.
|
||||
// 3. When we learn about a new CPU ID, we need to write down a "new cpu
|
||||
// id" MetadataRecord before writing out the actual FunctionRecord.
|
||||
//
|
||||
// - An End-of-Buffer (EOB) MetadataRecord is 16 bytes.
|
||||
//
|
||||
// So the math we need to do is to determine whether writing 24 bytes past the
|
||||
// current pointer leaves us with enough bytes to write the EOB
|
||||
// MetadataRecord. If we don't have enough space after writing as much as 24
|
||||
// bytes in the end of the buffer, we need to write out the EOB, get a new
|
||||
// Buffer, set it up properly before doing any further writing.
|
||||
//
|
||||
char *BufferStart = static_cast<char *>(Buffer.Buffer);
|
||||
if ((RecordPtr + (MetadataRecSize + FunctionRecSize)) - BufferStart <
|
||||
static_cast<ptrdiff_t>(MetadataRecSize)) {
|
||||
writeEOBMetadata();
|
||||
if (auto EC = LocalBQ->releaseBuffer(Buffer)) {
|
||||
Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
|
||||
EC.message().c_str());
|
||||
return;
|
||||
}
|
||||
if (auto EC = LocalBQ->getBuffer(Buffer)) {
|
||||
Report("Failed to acquire a buffer; error=%s\n", EC.message().c_str());
|
||||
return;
|
||||
}
|
||||
setupNewBuffer(Buffer);
|
||||
}
|
||||
|
||||
// By this point, we are now ready to write at most 24 bytes (one metadata
|
||||
// record and one function record).
|
||||
BufferStart = static_cast<char *>(Buffer.Buffer);
|
||||
assert((RecordPtr + (MetadataRecSize + FunctionRecSize)) - BufferStart >=
|
||||
static_cast<ptrdiff_t>(MetadataRecSize) &&
|
||||
"Misconfigured BufferQueue provided; Buffer size not large enough.");
|
||||
|
||||
std::aligned_storage<sizeof(FunctionRecord), alignof(FunctionRecord)>::type
|
||||
AlignedFuncRecordBuffer;
|
||||
auto &FuncRecord =
|
||||
*reinterpret_cast<FunctionRecord *>(&AlignedFuncRecordBuffer);
|
||||
FuncRecord.Type = uint8_t(RecordType::Function);
|
||||
|
||||
// Only get the lower 28 bits of the function id.
|
||||
FuncRecord.FuncId = FuncId & ~(0x0F << 28);
|
||||
|
||||
// Here we compute the TSC Delta. There are a few interesting situations we
|
||||
// need to account for:
|
||||
//
|
||||
// - The thread has migrated to a different CPU. If this is the case, then
|
||||
// we write down the following records:
|
||||
//
|
||||
// 1. A 'NewCPUId' Metadata record.
|
||||
// 2. A FunctionRecord with a 0 for the TSCDelta field.
|
||||
//
|
||||
// - The TSC delta is greater than the 32 bits we can store in a
|
||||
// FunctionRecord. In this case we write down the following records:
|
||||
//
|
||||
// 1. A 'TSCWrap' Metadata record.
|
||||
// 2. A FunctionRecord with a 0 for the TSCDelta field.
|
||||
//
|
||||
// - The TSC delta is representable within the 32 bits we can store in a
|
||||
// FunctionRecord. In this case we write down just a FunctionRecord with
|
||||
// the correct TSC delta.
|
||||
//
|
||||
FuncRecord.TSCDelta = 0;
|
||||
if (CPU != CurrentCPU) {
|
||||
// We've moved to a new CPU.
|
||||
writeNewCPUIdMetadata(CPU, TSC);
|
||||
} else {
|
||||
// If the delta is greater than the range for a uint32_t, then we write out
|
||||
// the TSC wrap metadata entry with the full TSC, and the TSC for the
|
||||
// function record be 0.
|
||||
auto Delta = LastTSC - TSC;
|
||||
if (Delta > (1ULL << 32) - 1)
|
||||
writeTSCWrapMetadata(TSC);
|
||||
else
|
||||
FuncRecord.TSCDelta = Delta;
|
||||
}
|
||||
|
||||
// We then update our "LastTSC" and "CurrentCPU" thread-local variables to aid
|
||||
// us in future computations of this TSC delta value.
|
||||
LastTSC = TSC;
|
||||
CurrentCPU = CPU;
|
||||
|
||||
switch (Entry) {
|
||||
case XRayEntryType::ENTRY:
|
||||
case XRayEntryType::LOG_ARGS_ENTRY:
|
||||
FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionEnter);
|
||||
break;
|
||||
case XRayEntryType::EXIT:
|
||||
FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionExit);
|
||||
break;
|
||||
case XRayEntryType::TAIL:
|
||||
FuncRecord.RecordKind =
|
||||
uint8_t(FunctionRecord::RecordKinds::FunctionTailExit);
|
||||
break;
|
||||
}
|
||||
|
||||
std::memcpy(RecordPtr, &AlignedFuncRecordBuffer, sizeof(FunctionRecord));
|
||||
RecordPtr += sizeof(FunctionRecord);
|
||||
|
||||
// If we've exhausted the buffer by this time, we then release the buffer to
|
||||
// make sure that other threads may start using this buffer.
|
||||
if ((RecordPtr + MetadataRecSize) - BufferStart == MetadataRecSize) {
|
||||
writeEOBMetadata();
|
||||
if (auto EC = LocalBQ->releaseBuffer(Buffer)) {
|
||||
Report("Failed releasing buffer at %p; error=%s\n", Buffer.Buffer,
|
||||
EC.message().c_str());
|
||||
return;
|
||||
}
|
||||
RecordPtr = nullptr;
|
||||
}
|
||||
__xray_fdr_internal::processFunctionHook(FuncId, Entry, TSC, CPU,
|
||||
clock_gettime, LoggingStatus, BQ);
|
||||
}
|
||||
|
||||
} // namespace __xray
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#define XRAY_XRAY_FDR_LOGGING_H
|
||||
|
||||
#include "xray/xray_log_interface.h"
|
||||
#include "xray_fdr_log_records.h"
|
||||
|
||||
// FDR (Flight Data Recorder) Mode
|
||||
// ===============================
|
||||
|
@ -26,57 +27,6 @@
|
|||
|
||||
namespace __xray {
|
||||
|
||||
enum class RecordType : uint8_t {
|
||||
Function, Metadata
|
||||
};
|
||||
|
||||
// A MetadataRecord encodes the kind of record in its first byte, and have 15
|
||||
// additional bytes in the end to hold free-form data.
|
||||
struct alignas(16) MetadataRecord {
|
||||
// A MetadataRecord must always have a type of 1.
|
||||
/* RecordType */ uint8_t Type : 1;
|
||||
|
||||
// Each kind of record is represented as a 7-bit value (even though we use an
|
||||
// unsigned 8-bit enum class to do so).
|
||||
enum class RecordKinds : uint8_t {
|
||||
NewBuffer,
|
||||
EndOfBuffer,
|
||||
NewCPUId,
|
||||
TSCWrap,
|
||||
WalltimeMarker,
|
||||
};
|
||||
// Use 7 bits to identify this record type.
|
||||
/* RecordKinds */ uint8_t RecordKind : 7;
|
||||
char Data[15];
|
||||
} __attribute__((packed));
|
||||
|
||||
static_assert(sizeof(MetadataRecord) == 16, "Wrong size for MetadataRecord.");
|
||||
|
||||
struct alignas(8) FunctionRecord {
|
||||
// A FunctionRecord must always have a type of 0.
|
||||
/* RecordType */ uint8_t Type : 1;
|
||||
enum class RecordKinds {
|
||||
FunctionEnter = 0x00,
|
||||
FunctionExit = 0x01,
|
||||
FunctionTailExit = 0x02,
|
||||
};
|
||||
/* RecordKinds */ uint8_t RecordKind : 3;
|
||||
|
||||
// We only use 28 bits of the function ID, so that we can use as few bytes as
|
||||
// possible. This means we only support 2^28 (268,435,456) unique function ids
|
||||
// in a single binary.
|
||||
int FuncId : 28;
|
||||
|
||||
// We use another 4 bytes to hold the delta between the previous entry's TSC.
|
||||
// In case we've found that the distance is greater than the allowable 32 bits
|
||||
// (either because we are running in a different CPU and the TSC might be
|
||||
// different then), we should use a MetadataRecord before this FunctionRecord
|
||||
// that will contain the full TSC for that CPU, and keep this to 0.
|
||||
uint32_t TSCDelta;
|
||||
} __attribute__((packed));
|
||||
|
||||
static_assert(sizeof(FunctionRecord) == 8, "Wrong size for FunctionRecord.");
|
||||
|
||||
// Options used by the FDR implementation.
|
||||
struct FDRLoggingOptions {
|
||||
bool ReportErrors = false;
|
||||
|
@ -85,7 +35,7 @@ struct FDRLoggingOptions {
|
|||
|
||||
// Flight Data Recorder mode implementation interfaces.
|
||||
XRayLogInitStatus fdrLoggingInit(size_t BufferSize, size_t BufferMax,
|
||||
void *Options, size_t OptionsSize);
|
||||
void *Options, size_t OptionsSize);
|
||||
XRayLogInitStatus fdrLoggingFinalize();
|
||||
void fdrLoggingHandleArg0(int32_t FuncId, XRayEntryType Entry);
|
||||
XRayLogFlushStatus fdrLoggingFlush();
|
||||
|
|
|
@ -0,0 +1,486 @@
|
|||
//===-- xray_fdr_logging_impl.h ---------------------------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file is a part of XRay, a dynamic runtime instrumentation system.
|
||||
//
|
||||
// Here we implement the thread local state management and record i/o for Flight
|
||||
// Data Recorder mode for XRay, where we use compact structures to store records
|
||||
// in memory as well as when writing out the data to files.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef XRAY_XRAY_FDR_LOGGING_IMPL_H
|
||||
#define XRAY_XRAY_FDR_LOGGING_IMPL_H
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <sys/syscall.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "sanitizer_common/sanitizer_common.h"
|
||||
#include "xray/xray_log_interface.h"
|
||||
#include "xray_buffer_queue.h"
|
||||
#include "xray_defs.h"
|
||||
#include "xray_fdr_log_records.h"
|
||||
|
||||
namespace __xray {
|
||||
|
||||
/// We expose some of the state transitions when FDR logging mode is operating
|
||||
/// such that we can simulate a series of log events that may occur without
|
||||
/// and test with determinism without worrying about the real CPU time.
|
||||
///
|
||||
/// Because the code uses thread_local allocation extensively as part of its
|
||||
/// design, callers that wish to test events occuring on different threads
|
||||
/// will actually have to run them on different threads.
|
||||
///
|
||||
/// This also means that it is possible to break invariants maintained by
|
||||
/// cooperation with xray_fdr_logging class, so be careful and think twice.
|
||||
namespace __xray_fdr_internal {
|
||||
|
||||
/// Writes the new buffer record and wallclock time that begin a buffer for a
|
||||
/// thread to MemPtr and increments MemPtr. Bypasses the thread local state
|
||||
// machine and writes directly to memory without checks.
|
||||
static void writeNewBufferPreamble(pid_t Tid, timespec TS, char *&MemPtr);
|
||||
|
||||
/// Write a metadata record to switch to a new CPU to MemPtr and increments
|
||||
/// MemPtr. Bypasses the thread local state machine and writes directly to
|
||||
/// memory without checks.
|
||||
static void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC, char *&MemPtr);
|
||||
|
||||
/// Writes an EOB metadata record to MemPtr and increments MemPtr. Bypasses the
|
||||
/// thread local state machine and writes directly to memory without checks.
|
||||
static void writeEOBMetadata(char *&MemPtr);
|
||||
|
||||
/// Writes a TSC Wrap metadata record to MemPtr and increments MemPtr. Bypasses
|
||||
/// the thread local state machine and directly writes to memory without checks.
|
||||
static void writeTSCWrapMetadata(uint64_t TSC, char *&MemPtr);
|
||||
|
||||
/// Writes a Function Record to MemPtr and increments MemPtr. Bypasses the
|
||||
/// thread local state machine and writes the function record directly to
|
||||
/// memory.
|
||||
static void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
|
||||
XRayEntryType EntryType, char *&MemPtr);
|
||||
|
||||
/// Sets up a new buffer in thread_local storage and writes a preamble. The
|
||||
/// wall_clock_reader function is used to populate the WallTimeRecord entry.
|
||||
static void setupNewBuffer(const BufferQueue::Buffer &Buffer,
|
||||
int (*wall_clock_reader)(clockid_t,
|
||||
struct timespec *));
|
||||
|
||||
/// Called to record CPU time for a new CPU within the current thread.
|
||||
static void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC);
|
||||
|
||||
/// Called to close the buffer when the thread exhausts the buffer or when the
|
||||
/// thread exits (via a thread local variable destructor).
|
||||
static void writeEOBMetadata();
|
||||
|
||||
/// TSC Wrap records are written when a TSC delta encoding scheme overflows.
|
||||
static void writeTSCWrapMetadata(uint64_t TSC);
|
||||
|
||||
/// Here's where the meat of the processing happens. The writer captures
|
||||
/// function entry, exit and tail exit points with a time and will create
|
||||
/// TSCWrap, NewCPUId and Function records as necessary. The writer might
|
||||
/// walk backward through its buffer and erase trivial functions to avoid
|
||||
/// polluting the log and may use the buffer queue to obtain or release a
|
||||
/// buffer.
|
||||
static void
|
||||
processFunctionHook(int32_t FuncId, XRayEntryType Entry, uint64_t TSC,
|
||||
unsigned char CPU,
|
||||
int (*wall_clock_reader)(clockid_t, struct timespec *),
|
||||
const std::atomic<XRayLogInitStatus> &LoggingStatus,
|
||||
const std::shared_ptr<BufferQueue> &BQ);
|
||||
|
||||
//-----------------------------------------------------------------------------|
|
||||
// The rest of the file is implementation. |
|
||||
//-----------------------------------------------------------------------------|
|
||||
// Functions are implemented in the header for inlining since we don't want |
|
||||
// to grow the stack when we've hijacked the binary for logging. |
|
||||
//-----------------------------------------------------------------------------|
|
||||
|
||||
namespace {
|
||||
thread_local BufferQueue::Buffer Buffer;
|
||||
thread_local char *RecordPtr = nullptr;
|
||||
|
||||
constexpr auto MetadataRecSize = sizeof(MetadataRecord);
|
||||
constexpr auto FunctionRecSize = sizeof(FunctionRecord);
|
||||
|
||||
class ThreadExitBufferCleanup {
|
||||
std::weak_ptr<BufferQueue> Buffers;
|
||||
BufferQueue::Buffer &Buffer;
|
||||
|
||||
public:
|
||||
explicit ThreadExitBufferCleanup(std::weak_ptr<BufferQueue> BQ,
|
||||
BufferQueue::Buffer &Buffer)
|
||||
XRAY_NEVER_INSTRUMENT : Buffers(BQ),
|
||||
Buffer(Buffer) {}
|
||||
|
||||
~ThreadExitBufferCleanup() noexcept XRAY_NEVER_INSTRUMENT {
|
||||
if (RecordPtr == nullptr)
|
||||
return;
|
||||
|
||||
// We make sure that upon exit, a thread will write out the EOB
|
||||
// MetadataRecord in the thread-local log, and also release the buffer to
|
||||
// the queue.
|
||||
assert((RecordPtr + MetadataRecSize) - static_cast<char *>(Buffer.Buffer) >=
|
||||
static_cast<ptrdiff_t>(MetadataRecSize));
|
||||
if (auto BQ = Buffers.lock()) {
|
||||
writeEOBMetadata();
|
||||
if (auto EC = BQ->releaseBuffer(Buffer))
|
||||
Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
|
||||
EC.message().c_str());
|
||||
return;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class RecursionGuard {
|
||||
bool &Running;
|
||||
const bool Valid;
|
||||
|
||||
public:
|
||||
explicit RecursionGuard(bool &R) : Running(R), Valid(!R) {
|
||||
if (Valid)
|
||||
Running = true;
|
||||
}
|
||||
|
||||
RecursionGuard(const RecursionGuard &) = delete;
|
||||
RecursionGuard(RecursionGuard &&) = delete;
|
||||
RecursionGuard &operator=(const RecursionGuard &) = delete;
|
||||
RecursionGuard &operator=(RecursionGuard &&) = delete;
|
||||
|
||||
explicit operator bool() const { return Valid; }
|
||||
|
||||
~RecursionGuard() noexcept {
|
||||
if (Valid)
|
||||
Running = false;
|
||||
}
|
||||
};
|
||||
|
||||
static inline bool loggingInitialized(
|
||||
const std::atomic<XRayLogInitStatus> &LoggingStatus) XRAY_NEVER_INSTRUMENT {
|
||||
return LoggingStatus.load(std::memory_order_acquire) ==
|
||||
XRayLogInitStatus::XRAY_LOG_INITIALIZED;
|
||||
}
|
||||
|
||||
} // namespace anonymous
|
||||
|
||||
static inline void writeNewBufferPreamble(pid_t Tid, timespec TS,
|
||||
char *&MemPtr) XRAY_NEVER_INSTRUMENT {
|
||||
static constexpr int InitRecordsCount = 2;
|
||||
std::aligned_storage<sizeof(MetadataRecord)>::type Records[InitRecordsCount];
|
||||
{
|
||||
// Write out a MetadataRecord to signify that this is the start of a new
|
||||
// buffer, associated with a particular thread, with a new CPU. For the
|
||||
// data, we have 15 bytes to squeeze as much information as we can. At this
|
||||
// point we only write down the following bytes:
|
||||
// - Thread ID (pid_t, 4 bytes)
|
||||
auto &NewBuffer = *reinterpret_cast<MetadataRecord *>(&Records[0]);
|
||||
NewBuffer.Type = uint8_t(RecordType::Metadata);
|
||||
NewBuffer.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewBuffer);
|
||||
std::memcpy(&NewBuffer.Data, &Tid, sizeof(pid_t));
|
||||
}
|
||||
// Also write the WalltimeMarker record.
|
||||
{
|
||||
static_assert(sizeof(time_t) <= 8, "time_t needs to be at most 8 bytes");
|
||||
auto &WalltimeMarker = *reinterpret_cast<MetadataRecord *>(&Records[1]);
|
||||
WalltimeMarker.Type = uint8_t(RecordType::Metadata);
|
||||
WalltimeMarker.RecordKind =
|
||||
uint8_t(MetadataRecord::RecordKinds::WalltimeMarker);
|
||||
|
||||
// We only really need microsecond precision here, and enforce across
|
||||
// platforms that we need 64-bit seconds and 32-bit microseconds encoded in
|
||||
// the Metadata record.
|
||||
int32_t Micros = TS.tv_nsec / 1000;
|
||||
int64_t Seconds = TS.tv_sec;
|
||||
std::memcpy(WalltimeMarker.Data, &Seconds, sizeof(Seconds));
|
||||
std::memcpy(WalltimeMarker.Data + sizeof(Seconds), &Micros, sizeof(Micros));
|
||||
}
|
||||
std::memcpy(MemPtr, Records, sizeof(MetadataRecord) * InitRecordsCount);
|
||||
MemPtr += sizeof(MetadataRecord) * InitRecordsCount;
|
||||
}
|
||||
|
||||
static inline void setupNewBuffer(const BufferQueue::Buffer &Buffer,
|
||||
int (*wall_clock_reader)(clockid_t,
|
||||
struct timespec *))
|
||||
XRAY_NEVER_INSTRUMENT {
|
||||
RecordPtr = static_cast<char *>(Buffer.Buffer);
|
||||
pid_t Tid = syscall(SYS_gettid);
|
||||
timespec TS{0, 0};
|
||||
// This is typically clock_gettime, but callers have injection ability.
|
||||
wall_clock_reader(CLOCK_MONOTONIC, &TS);
|
||||
writeNewBufferPreamble(Tid, TS, RecordPtr);
|
||||
}
|
||||
|
||||
static inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC,
|
||||
char *&MemPtr) XRAY_NEVER_INSTRUMENT {
|
||||
MetadataRecord NewCPUId;
|
||||
NewCPUId.Type = uint8_t(RecordType::Metadata);
|
||||
NewCPUId.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewCPUId);
|
||||
|
||||
// The data for the New CPU will contain the following bytes:
|
||||
// - CPU ID (uint16_t, 2 bytes)
|
||||
// - Full TSC (uint64_t, 8 bytes)
|
||||
// Total = 12 bytes.
|
||||
std::memcpy(&NewCPUId.Data, &CPU, sizeof(CPU));
|
||||
std::memcpy(&NewCPUId.Data[sizeof(CPU)], &TSC, sizeof(TSC));
|
||||
std::memcpy(MemPtr, &NewCPUId, sizeof(MetadataRecord));
|
||||
MemPtr += sizeof(MetadataRecord);
|
||||
}
|
||||
|
||||
static inline void writeNewCPUIdMetadata(uint16_t CPU,
|
||||
uint64_t TSC) XRAY_NEVER_INSTRUMENT {
|
||||
writeNewCPUIdMetadata(CPU, TSC, RecordPtr);
|
||||
}
|
||||
|
||||
static inline void writeEOBMetadata(char *&MemPtr) XRAY_NEVER_INSTRUMENT {
|
||||
MetadataRecord EOBMeta;
|
||||
EOBMeta.Type = uint8_t(RecordType::Metadata);
|
||||
EOBMeta.RecordKind = uint8_t(MetadataRecord::RecordKinds::EndOfBuffer);
|
||||
// For now we don't write any bytes into the Data field.
|
||||
std::memcpy(MemPtr, &EOBMeta, sizeof(MetadataRecord));
|
||||
MemPtr += sizeof(MetadataRecord);
|
||||
}
|
||||
|
||||
static inline void writeEOBMetadata() XRAY_NEVER_INSTRUMENT {
|
||||
writeEOBMetadata(RecordPtr);
|
||||
}
|
||||
|
||||
static inline void writeTSCWrapMetadata(uint64_t TSC,
|
||||
char *&MemPtr) XRAY_NEVER_INSTRUMENT {
|
||||
MetadataRecord TSCWrap;
|
||||
TSCWrap.Type = uint8_t(RecordType::Metadata);
|
||||
TSCWrap.RecordKind = uint8_t(MetadataRecord::RecordKinds::TSCWrap);
|
||||
|
||||
// The data for the TSCWrap record contains the following bytes:
|
||||
// - Full TSC (uint64_t, 8 bytes)
|
||||
// Total = 8 bytes.
|
||||
std::memcpy(&TSCWrap.Data, &TSC, sizeof(TSC));
|
||||
std::memcpy(MemPtr, &TSCWrap, sizeof(MetadataRecord));
|
||||
MemPtr += sizeof(MetadataRecord);
|
||||
}
|
||||
|
||||
static inline void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT {
|
||||
writeTSCWrapMetadata(TSC, RecordPtr);
|
||||
}
|
||||
|
||||
static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
|
||||
XRayEntryType EntryType,
|
||||
char *&MemPtr) XRAY_NEVER_INSTRUMENT {
|
||||
std::aligned_storage<sizeof(FunctionRecord), alignof(FunctionRecord)>::type
|
||||
AlignedFuncRecordBuffer;
|
||||
auto &FuncRecord =
|
||||
*reinterpret_cast<FunctionRecord *>(&AlignedFuncRecordBuffer);
|
||||
FuncRecord.Type = uint8_t(RecordType::Function);
|
||||
// Only take 28 bits of the function id.
|
||||
FuncRecord.FuncId = FuncId & ~(0x0F << 28);
|
||||
FuncRecord.TSCDelta = TSCDelta;
|
||||
|
||||
switch (EntryType) {
|
||||
case XRayEntryType::ENTRY:
|
||||
case XRayEntryType::LOG_ARGS_ENTRY:
|
||||
FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionEnter);
|
||||
break;
|
||||
case XRayEntryType::EXIT:
|
||||
FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionExit);
|
||||
break;
|
||||
case XRayEntryType::TAIL:
|
||||
FuncRecord.RecordKind =
|
||||
uint8_t(FunctionRecord::RecordKinds::FunctionTailExit);
|
||||
break;
|
||||
}
|
||||
|
||||
std::memcpy(MemPtr, &AlignedFuncRecordBuffer, sizeof(FunctionRecord));
|
||||
MemPtr += sizeof(FunctionRecord);
|
||||
}
|
||||
|
||||
static inline void processFunctionHook(
|
||||
int32_t FuncId, XRayEntryType Entry, uint64_t TSC, unsigned char CPU,
|
||||
int (*wall_clock_reader)(clockid_t, struct timespec *),
|
||||
const std::atomic<XRayLogInitStatus> &LoggingStatus,
|
||||
const std::shared_ptr<BufferQueue> &BQ) XRAY_NEVER_INSTRUMENT {
|
||||
// Bail out right away if logging is not initialized yet.
|
||||
if (LoggingStatus.load(std::memory_order_acquire) !=
|
||||
XRayLogInitStatus::XRAY_LOG_INITIALIZED)
|
||||
return;
|
||||
|
||||
// We use a thread_local variable to keep track of which CPUs we've already
|
||||
// run, and the TSC times for these CPUs. This allows us to stop repeating the
|
||||
// CPU field in the function records.
|
||||
//
|
||||
// We assume that we'll support only 65536 CPUs for x86_64.
|
||||
thread_local uint16_t CurrentCPU = std::numeric_limits<uint16_t>::max();
|
||||
thread_local uint64_t LastTSC = 0;
|
||||
|
||||
// Make sure a thread that's ever called handleArg0 has a thread-local
|
||||
// live reference to the buffer queue for this particular instance of
|
||||
// FDRLogging, and that we're going to clean it up when the thread exits.
|
||||
thread_local auto LocalBQ = BQ;
|
||||
thread_local ThreadExitBufferCleanup Cleanup(LocalBQ, Buffer);
|
||||
|
||||
// Prevent signal handler recursion, so in case we're already in a log writing
|
||||
// mode and the signal handler comes in (and is also instrumented) then we
|
||||
// don't want to be clobbering potentially partial writes already happening in
|
||||
// the thread. We use a simple thread_local latch to only allow one on-going
|
||||
// handleArg0 to happen at any given time.
|
||||
thread_local bool Running = false;
|
||||
RecursionGuard Guard{Running};
|
||||
if (!Guard) {
|
||||
assert(Running == true && "RecursionGuard is buggy!");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!loggingInitialized(LoggingStatus) || LocalBQ->finalizing()) {
|
||||
writeEOBMetadata();
|
||||
if (auto EC = BQ->releaseBuffer(Buffer)) {
|
||||
Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
|
||||
EC.message().c_str());
|
||||
return;
|
||||
}
|
||||
RecordPtr = nullptr;
|
||||
}
|
||||
|
||||
if (Buffer.Buffer == nullptr) {
|
||||
if (auto EC = LocalBQ->getBuffer(Buffer)) {
|
||||
auto LS = LoggingStatus.load(std::memory_order_acquire);
|
||||
if (LS != XRayLogInitStatus::XRAY_LOG_FINALIZING &&
|
||||
LS != XRayLogInitStatus::XRAY_LOG_FINALIZED)
|
||||
Report("Failed to acquire a buffer; error=%s\n", EC.message().c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
setupNewBuffer(Buffer, wall_clock_reader);
|
||||
}
|
||||
|
||||
if (CurrentCPU == std::numeric_limits<uint16_t>::max()) {
|
||||
// This means this is the first CPU this thread has ever run on. We set the
|
||||
// current CPU and record this as the first TSC we've seen.
|
||||
CurrentCPU = CPU;
|
||||
writeNewCPUIdMetadata(CPU, TSC);
|
||||
}
|
||||
|
||||
// Before we go setting up writing new function entries, we need to be really
|
||||
// careful about the pointer math we're doing. This means we need to ensure
|
||||
// that the record we are about to write is going to fit into the buffer,
|
||||
// without overflowing the buffer.
|
||||
//
|
||||
// To do this properly, we use the following assumptions:
|
||||
//
|
||||
// - The least number of bytes we will ever write is 8
|
||||
// (sizeof(FunctionRecord)) only if the delta between the previous entry
|
||||
// and this entry is within 32 bits.
|
||||
// - The most number of bytes we will ever write is 8 + 16 = 24. This is
|
||||
// computed by:
|
||||
//
|
||||
// sizeof(FunctionRecord) + sizeof(MetadataRecord)
|
||||
//
|
||||
// These arise in the following cases:
|
||||
//
|
||||
// 1. When the delta between the TSC we get and the previous TSC for the
|
||||
// same CPU is outside of the uint32_t range, we end up having to
|
||||
// write a MetadataRecord to indicate a "tsc wrap" before the actual
|
||||
// FunctionRecord.
|
||||
// 2. When we learn that we've moved CPUs, we need to write a
|
||||
// MetadataRecord to indicate a "cpu change", and thus write out the
|
||||
// current TSC for that CPU before writing out the actual
|
||||
// FunctionRecord.
|
||||
// 3. When we learn about a new CPU ID, we need to write down a "new cpu
|
||||
// id" MetadataRecord before writing out the actual FunctionRecord.
|
||||
//
|
||||
// - An End-of-Buffer (EOB) MetadataRecord is 16 bytes.
|
||||
//
|
||||
// So the math we need to do is to determine whether writing 24 bytes past the
|
||||
// current pointer leaves us with enough bytes to write the EOB
|
||||
// MetadataRecord. If we don't have enough space after writing as much as 24
|
||||
// bytes in the end of the buffer, we need to write out the EOB, get a new
|
||||
// Buffer, set it up properly before doing any further writing.
|
||||
//
|
||||
char *BufferStart = static_cast<char *>(Buffer.Buffer);
|
||||
if ((RecordPtr + (MetadataRecSize + FunctionRecSize)) - BufferStart <
|
||||
static_cast<ptrdiff_t>(MetadataRecSize)) {
|
||||
writeEOBMetadata();
|
||||
if (auto EC = LocalBQ->releaseBuffer(Buffer)) {
|
||||
Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
|
||||
EC.message().c_str());
|
||||
return;
|
||||
}
|
||||
if (auto EC = LocalBQ->getBuffer(Buffer)) {
|
||||
Report("Failed to acquire a buffer; error=%s\n", EC.message().c_str());
|
||||
return;
|
||||
}
|
||||
setupNewBuffer(Buffer, wall_clock_reader);
|
||||
}
|
||||
|
||||
// By this point, we are now ready to write at most 24 bytes (one metadata
|
||||
// record and one function record).
|
||||
BufferStart = static_cast<char *>(Buffer.Buffer);
|
||||
assert((RecordPtr + (MetadataRecSize + FunctionRecSize)) - BufferStart >=
|
||||
static_cast<ptrdiff_t>(MetadataRecSize) &&
|
||||
"Misconfigured BufferQueue provided; Buffer size not large enough.");
|
||||
|
||||
// Here we compute the TSC Delta. There are a few interesting situations we
|
||||
// need to account for:
|
||||
//
|
||||
// - The thread has migrated to a different CPU. If this is the case, then
|
||||
// we write down the following records:
|
||||
//
|
||||
// 1. A 'NewCPUId' Metadata record.
|
||||
// 2. A FunctionRecord with a 0 for the TSCDelta field.
|
||||
//
|
||||
// - The TSC delta is greater than the 32 bits we can store in a
|
||||
// FunctionRecord. In this case we write down the following records:
|
||||
//
|
||||
// 1. A 'TSCWrap' Metadata record.
|
||||
// 2. A FunctionRecord with a 0 for the TSCDelta field.
|
||||
//
|
||||
// - The TSC delta is representable within the 32 bits we can store in a
|
||||
// FunctionRecord. In this case we write down just a FunctionRecord with
|
||||
// the correct TSC delta.
|
||||
//
|
||||
|
||||
uint32_t RecordTSCDelta = 0;
|
||||
if (CPU != CurrentCPU) {
|
||||
// We've moved to a new CPU.
|
||||
writeNewCPUIdMetadata(CPU, TSC);
|
||||
} else {
|
||||
// If the delta is greater than the range for a uint32_t, then we write out
|
||||
// the TSC wrap metadata entry with the full TSC, and the TSC for the
|
||||
// function record be 0.
|
||||
auto Delta = TSC - LastTSC;
|
||||
if (Delta > (1ULL << 32) - 1)
|
||||
writeTSCWrapMetadata(TSC);
|
||||
else
|
||||
RecordTSCDelta = Delta;
|
||||
}
|
||||
|
||||
// We then update our "LastTSC" and "CurrentCPU" thread-local variables to aid
|
||||
// us in future computations of this TSC delta value.
|
||||
LastTSC = TSC;
|
||||
CurrentCPU = CPU;
|
||||
|
||||
writeFunctionRecord(FuncId, RecordTSCDelta, Entry, RecordPtr);
|
||||
|
||||
// If we've exhausted the buffer by this time, we then release the buffer to
|
||||
// make sure that other threads may start using this buffer.
|
||||
if ((RecordPtr + MetadataRecSize) - BufferStart == MetadataRecSize) {
|
||||
writeEOBMetadata();
|
||||
if (auto EC = LocalBQ->releaseBuffer(Buffer)) {
|
||||
Report("Failed releasing buffer at %p; error=%s\n", Buffer.Buffer,
|
||||
EC.message().c_str());
|
||||
return;
|
||||
}
|
||||
RecordPtr = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace __xray_fdr_internal
|
||||
|
||||
} // namespace __xray
|
||||
|
||||
#endif // XRAY_XRAY_FDR_LOGGING_IMPL_H
|
Loading…
Reference in New Issue