[XRay] Minimal tool to convert xray traces to Chrome's Trace Event Format.

Minimal tool to convert xray traces to Chrome's Trace Event Format.

Summary:
Make use of Chrome Trace Event format's Duration events and stack frame dict to
produce Json files that chrome://tracing can visualize from xray function call
traces. Trace Event format is more robust and has several features like
argument logging, function categorization, multi process traces, etc. that we
can add as needed. Duration events cover an important base case.

Part of this change is rearranging the code so that the TrieNode data structure
can be used from multiple tools and can carry parameterized baggage on the
nodes. I put the actual behavior changes in llvm-xray convert exclusively.

Exploring the trace of instrumented llc was pretty nifty if overwhelming.
I can envision this being very useful for analyzing contention scenarios or
tuning parameters like batch sizes in a producer consumer queue. For more
targeted traces likemthis, let's talk about how we want to approach trace
pruning.

Reviewers: dberris, pelikan

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D39362

llvm-svn: 317531
This commit is contained in:
Keith Wyss 2017-11-07 00:28:28 +00:00
parent 989b1fcb01
commit 424279958d
4 changed files with 374 additions and 112 deletions

View File

@ -0,0 +1,92 @@
//===- trie-node.h - XRay Call Stack Data Structure -----------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file provides a data structure and routines for working with call stacks
// of instrumented functions.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_XRAY_STACK_TRIE_H
#define LLVM_TOOLS_LLVM_XRAY_STACK_TRIE_H
#include <forward_list>
#include <numeric>
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
/// A type to represent a trie of invocations. It is useful to construct a
/// graph of these nodes from reading an XRay trace, such that each function
/// call can be placed in a larger context.
///
/// The template parameter allows users of the template to attach their own
/// data elements to each node in the invocation graph.
template <typename AssociatedData> struct TrieNode {
/// The function ID.
int32_t FuncId;
/// The caller of this function.
TrieNode<AssociatedData> *Parent;
/// The callees from this function.
llvm::SmallVector<TrieNode<AssociatedData> *, 4> Callees;
/// Additional parameterized data on each node.
AssociatedData ExtraData;
};
/// Merges together two TrieNodes with like function ids, aggregating their
/// callee lists and durations. The caller must provide storage where new merged
/// nodes can be allocated in the form of a linked list.
template <typename T, typename Callable>
TrieNode<T> *
mergeTrieNodes(const TrieNode<T> &Left, const TrieNode<T> &Right,
/*Non-deduced pointer type for nullptr compatibility*/
typename std::remove_reference<TrieNode<T> *>::type NewParent,
std::forward_list<TrieNode<T>> &NodeStore,
Callable &&MergeCallable) {
llvm::function_ref<T(const T &, const T &)> MergeFn(
std::forward<Callable>(MergeCallable));
assert(Left.FuncId == Right.FuncId);
NodeStore.push_front(TrieNode<T>{
Left.FuncId, NewParent, {}, MergeFn(Left.ExtraData, Right.ExtraData)});
auto I = NodeStore.begin();
auto *Node = &*I;
// Build a map of callees from the left side.
llvm::DenseMap<int32_t, TrieNode<T> *> LeftCalleesByFuncId;
for (auto *Callee : Left.Callees) {
LeftCalleesByFuncId[Callee->FuncId] = Callee;
}
// Iterate through the right side, either merging with the map values or
// directly adding to the Callees vector. The iteration also removes any
// merged values from the left side map.
// TODO: Unroll into iterative and explicit stack for efficiency.
for (auto *Callee : Right.Callees) {
auto iter = LeftCalleesByFuncId.find(Callee->FuncId);
if (iter != LeftCalleesByFuncId.end()) {
Node->Callees.push_back(
mergeTrieNodes(*(iter->second), *Callee, Node, NodeStore, MergeFn));
LeftCalleesByFuncId.erase(iter);
} else {
Node->Callees.push_back(Callee);
}
}
// Add any callees that weren't found in the right side.
for (auto MapPairIter : LeftCalleesByFuncId) {
Node->Callees.push_back(MapPairIter.second);
}
return Node;
}
#endif // LLVM_TOOLS_LLVM_XRAY_STACK_TRIE_H

View File

@ -12,10 +12,12 @@
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "xray-converter.h" #include "xray-converter.h"
#include "trie-node.h"
#include "xray-registry.h" #include "xray-registry.h"
#include "llvm/DebugInfo/Symbolize/Symbolize.h" #include "llvm/DebugInfo/Symbolize/Symbolize.h"
#include "llvm/Support/EndianStream.h" #include "llvm/Support/EndianStream.h"
#include "llvm/Support/FileSystem.h" #include "llvm/Support/FileSystem.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/YAMLTraits.h" #include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h" #include "llvm/Support/raw_ostream.h"
@ -32,11 +34,14 @@ static cl::SubCommand Convert("convert", "Trace Format Conversion");
static cl::opt<std::string> ConvertInput(cl::Positional, static cl::opt<std::string> ConvertInput(cl::Positional,
cl::desc("<xray log file>"), cl::desc("<xray log file>"),
cl::Required, cl::sub(Convert)); cl::Required, cl::sub(Convert));
enum class ConvertFormats { BINARY, YAML }; enum class ConvertFormats { BINARY, YAML, CHROME_TRACE_EVENT };
static cl::opt<ConvertFormats> ConvertOutputFormat( static cl::opt<ConvertFormats> ConvertOutputFormat(
"output-format", cl::desc("output format"), "output-format", cl::desc("output format"),
cl::values(clEnumValN(ConvertFormats::BINARY, "raw", "output in binary"), cl::values(clEnumValN(ConvertFormats::BINARY, "raw", "output in binary"),
clEnumValN(ConvertFormats::YAML, "yaml", "output in yaml")), clEnumValN(ConvertFormats::YAML, "yaml", "output in yaml"),
clEnumValN(ConvertFormats::CHROME_TRACE_EVENT, "trace_event",
"Output in chrome's trace event format. "
"May be visualized with the Catapult trace viewer.")),
cl::sub(Convert)); cl::sub(Convert));
static cl::alias ConvertOutputFormat2("f", cl::aliasopt(ConvertOutputFormat), static cl::alias ConvertOutputFormat2("f", cl::aliasopt(ConvertOutputFormat),
cl::desc("Alias for -output-format"), cl::desc("Alias for -output-format"),
@ -142,6 +147,192 @@ void TraceConverter::exportAsRAWv1(const Trace &Records, raw_ostream &OS) {
} }
} }
namespace {
// A structure that allows building a dictionary of stack ids for the Chrome
// trace event format.
struct StackIdData {
// Each Stack of function calls has a unique ID.
unsigned id;
// Bookkeeping so that IDs can be maintained uniquely across threads.
// Traversal keeps sibling pointers to other threads stacks. This is helpful
// to determine when a thread encounters a new stack and should assign a new
// unique ID.
SmallVector<TrieNode<StackIdData> *, 4> siblings;
};
using StackTrieNode = TrieNode<StackIdData>;
// A helper function to find the sibling nodes for an encountered function in a
// thread of execution. Relies on the invariant that each time a new node is
// traversed in a thread, sibling bidirectional pointers are maintained.
SmallVector<StackTrieNode *, 4>
findSiblings(StackTrieNode *parent, int32_t FnId, uint32_t TId,
const DenseMap<uint32_t, SmallVector<StackTrieNode *, 4>>
&StackRootsByThreadId) {
SmallVector<StackTrieNode *, 4> Siblings{};
if (parent == nullptr) {
for (auto map_iter : StackRootsByThreadId) {
// Only look for siblings in other threads.
if (map_iter.first != TId)
for (auto node_iter : map_iter.second) {
if (node_iter->FuncId == FnId)
Siblings.push_back(node_iter);
}
}
return Siblings;
}
for (auto *ParentSibling : parent->ExtraData.siblings)
for (auto node_iter : ParentSibling->Callees)
if (node_iter->FuncId == FnId)
Siblings.push_back(node_iter);
return Siblings;
}
// Given a function being invoked in a thread with id TId, finds and returns the
// StackTrie representing the function call stack. If no node exists, creates
// the node. Assigns unique IDs to stacks newly encountered among all threads
// and keeps sibling links up to when creating new nodes.
StackTrieNode *findOrCreateStackNode(
StackTrieNode *Parent, int32_t FuncId, uint32_t TId,
DenseMap<uint32_t, SmallVector<StackTrieNode *, 4>> &StackRootsByThreadId,
DenseMap<unsigned, StackTrieNode *> &StacksByStackId, unsigned *id_counter,
std::forward_list<StackTrieNode> &NodeStore) {
SmallVector<StackTrieNode *, 4> &ParentCallees =
Parent == nullptr ? StackRootsByThreadId[TId] : Parent->Callees;
auto match = find_if(ParentCallees, [FuncId](StackTrieNode *ParentCallee) {
return FuncId == ParentCallee->FuncId;
});
if (match != ParentCallees.end())
return *match;
SmallVector<StackTrieNode *, 4> siblings =
findSiblings(Parent, FuncId, TId, StackRootsByThreadId);
if (siblings.empty()) {
NodeStore.push_front({FuncId, Parent, {}, {(*id_counter)++, {}}});
StackTrieNode *CurrentStack = &NodeStore.front();
StacksByStackId[*id_counter - 1] = CurrentStack;
ParentCallees.push_back(CurrentStack);
return CurrentStack;
}
unsigned stack_id = siblings[0]->ExtraData.id;
NodeStore.push_front({FuncId, Parent, {}, {stack_id, std::move(siblings)}});
StackTrieNode *CurrentStack = &NodeStore.front();
for (auto *sibling : CurrentStack->ExtraData.siblings)
sibling->ExtraData.siblings.push_back(CurrentStack);
ParentCallees.push_back(CurrentStack);
return CurrentStack;
}
void writeTraceViewerRecord(raw_ostream &OS, int32_t FuncId, uint32_t TId,
bool Symbolize,
const FuncIdConversionHelper &FuncIdHelper,
double EventTimestampUs,
const StackTrieNode &StackCursor,
StringRef FunctionPhenotype) {
OS << " ";
OS << llvm::formatv(
R"({ "name" : "{0}", "ph" : "{1}", "tid" : "{2}", "pid" : "1", )"
R"("ts" : "{3:f3}", "sf" : "{4}" })",
(Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId)
: llvm::to_string(FuncId)),
FunctionPhenotype, TId, EventTimestampUs, StackCursor.ExtraData.id);
}
} // namespace
void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
raw_ostream &OS) {
const auto &FH = Records.getFileHeader();
auto CycleFreq = FH.CycleFrequency;
unsigned id_counter = 0;
OS << "{\n \"traceEvents\": [";
DenseMap<uint32_t, StackTrieNode *> StackCursorByThreadId{};
DenseMap<uint32_t, SmallVector<StackTrieNode *, 4>> StackRootsByThreadId{};
DenseMap<unsigned, StackTrieNode *> StacksByStackId{};
std::forward_list<StackTrieNode> NodeStore{};
int loop_count = 0;
for (const auto &R : Records) {
if (loop_count++ == 0)
OS << "\n";
else
OS << ",\n";
// Chrome trace event format always wants data in micros.
// CyclesPerMicro = CycleHertz / 10^6
// TSC / CyclesPerMicro == TSC * 10^6 / CycleHertz == MicroTimestamp
// Could lose some precision here by converting the TSC to a double to
// multiply by the period in micros. 52 bit mantissa is a good start though.
// TODO: Make feature request to Chrome Trace viewer to accept ticks and a
// frequency or do some more involved calculation to avoid dangers of
// conversion.
double EventTimestampUs = double(1000000) / CycleFreq * double(R.TSC);
StackTrieNode *&StackCursor = StackCursorByThreadId[R.TId];
switch (R.Type) {
case RecordTypes::ENTER:
case RecordTypes::ENTER_ARG:
StackCursor = findOrCreateStackNode(StackCursor, R.FuncId, R.TId,
StackRootsByThreadId, StacksByStackId,
&id_counter, NodeStore);
// Each record is represented as a json dictionary with function name,
// type of B for begin or E for end, thread id, process id (faked),
// timestamp in microseconds, and a stack frame id. The ids are logged
// in an id dictionary after the events.
writeTraceViewerRecord(OS, R.FuncId, R.TId, Symbolize, FuncIdHelper,
EventTimestampUs, *StackCursor, "B");
break;
case RecordTypes::EXIT:
case RecordTypes::TAIL_EXIT:
// No entries to record end for.
if (StackCursor == nullptr)
break;
// Should we emit an END record anyway or account this condition?
// (And/Or in loop termination below)
StackTrieNode *PreviousCursor = nullptr;
do {
writeTraceViewerRecord(OS, StackCursor->FuncId, R.TId, Symbolize,
FuncIdHelper, EventTimestampUs, *StackCursor,
"E");
PreviousCursor = StackCursor;
StackCursor = StackCursor->Parent;
} while (PreviousCursor->FuncId != R.FuncId && StackCursor != nullptr);
break;
}
}
OS << "\n ],\n"; // Close the Trace Events array.
OS << " "
<< "\"displayTimeUnit\": \"ns\",\n";
// The stackFrames dictionary substantially reduces size of the output file by
// avoiding repeating the entire call stack of function names for each entry.
OS << R"( "stackFrames": {)";
int stack_frame_count = 0;
for (auto map_iter : StacksByStackId) {
if (stack_frame_count++ == 0)
OS << "\n";
else
OS << ",\n";
OS << " ";
OS << llvm::formatv(
R"("{0}" : { "name" : "{1}")", map_iter.first,
(Symbolize ? FuncIdHelper.SymbolOrNumber(map_iter.second->FuncId)
: llvm::to_string(map_iter.second->FuncId)));
if (map_iter.second->Parent != nullptr)
OS << llvm::formatv(R"(, "parent": "{0}")",
map_iter.second->Parent->ExtraData.id);
OS << " }";
}
OS << "\n }\n"; // Close the stack frames map.
OS << "}\n"; // Close the JSON entry.
}
namespace llvm { namespace llvm {
namespace xray { namespace xray {
@ -191,6 +382,9 @@ static CommandRegistration Unused(&Convert, []() -> Error {
case ConvertFormats::BINARY: case ConvertFormats::BINARY:
TC.exportAsRAWv1(T, OS); TC.exportAsRAWv1(T, OS);
break; break;
case ConvertFormats::CHROME_TRACE_EVENT:
TC.exportAsChromeTraceEventFormat(T, OS);
break;
} }
return Error::success(); return Error::success();
}); });

View File

@ -15,8 +15,8 @@
#define LLVM_TOOLS_LLVM_XRAY_XRAY_CONVERTER_H #define LLVM_TOOLS_LLVM_XRAY_XRAY_CONVERTER_H
#include "func-id-helper.h" #include "func-id-helper.h"
#include "llvm/XRay/XRayRecord.h"
#include "llvm/XRay/Trace.h" #include "llvm/XRay/Trace.h"
#include "llvm/XRay/XRayRecord.h"
namespace llvm { namespace llvm {
namespace xray { namespace xray {
@ -31,6 +31,11 @@ public:
void exportAsYAML(const Trace &Records, raw_ostream &OS); void exportAsYAML(const Trace &Records, raw_ostream &OS);
void exportAsRAWv1(const Trace &Records, raw_ostream &OS); void exportAsRAWv1(const Trace &Records, raw_ostream &OS);
/// For this conversion, the Function records within each thread are expected
/// to be in sorted TSC order. The trace event format encodes stack traces, so
/// the linear history is essential for correct output.
void exportAsChromeTraceEventFormat(const Trace &Records, raw_ostream &OS);
}; };
} // namespace xray } // namespace xray

View File

@ -19,6 +19,7 @@
#include <numeric> #include <numeric>
#include "func-id-helper.h" #include "func-id-helper.h"
#include "trie-node.h"
#include "xray-registry.h" #include "xray-registry.h"
#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h" #include "llvm/Support/CommandLine.h"
@ -255,96 +256,61 @@ private:
/// maintain an index of unique functions, and provide a means of iterating /// maintain an index of unique functions, and provide a means of iterating
/// through all the instrumented call stacks which we know about. /// through all the instrumented call stacks which we know about.
struct TrieNode { struct StackDuration {
int32_t FuncId; llvm::SmallVector<int64_t, 4> TerminalDurations;
TrieNode *Parent; llvm::SmallVector<int64_t, 4> IntermediateDurations;
SmallVector<TrieNode *, 4> Callees;
// Separate durations depending on whether the node is the deepest node in the
// stack.
SmallVector<int64_t, 4> TerminalDurations;
SmallVector<int64_t, 4> IntermediateDurations;
}; };
/// Merges together two TrieNodes with like function ids, aggregating their StackDuration mergeStackDuration(const StackDuration &Left,
/// callee lists and durations. The caller must provide storage where new merged const StackDuration &Right) {
/// nodes can be allocated in the form of a linked list. StackDuration Data{};
TrieNode *mergeTrieNodes(const TrieNode &Left, const TrieNode &Right, Data.TerminalDurations.reserve(Left.TerminalDurations.size() +
TrieNode *NewParent, Right.TerminalDurations.size());
std::forward_list<TrieNode> &NodeStore) { Data.IntermediateDurations.reserve(Left.IntermediateDurations.size() +
assert(Left.FuncId == Right.FuncId); Right.IntermediateDurations.size());
NodeStore.push_front(TrieNode{Left.FuncId, NewParent, {}, {}, {}});
auto I = NodeStore.begin();
auto *Node = &*I;
// Build a map of callees from the left side.
DenseMap<int32_t, TrieNode *> LeftCalleesByFuncId;
for (auto *Callee : Left.Callees) {
LeftCalleesByFuncId[Callee->FuncId] = Callee;
}
// Iterate through the right side, either merging with the map values or
// directly adding to the Callees vector. The iteration also removes any
// merged values from the left side map.
for (auto *Callee : Right.Callees) {
auto iter = LeftCalleesByFuncId.find(Callee->FuncId);
if (iter != LeftCalleesByFuncId.end()) {
Node->Callees.push_back(
mergeTrieNodes(*(iter->second), *Callee, Node, NodeStore));
LeftCalleesByFuncId.erase(iter);
} else {
Node->Callees.push_back(Callee);
}
}
// Add any callees that weren't found in the right side.
for (auto MapPairIter : LeftCalleesByFuncId) {
Node->Callees.push_back(MapPairIter.second);
}
// Aggregate the durations. // Aggregate the durations.
for (auto duration : Left.TerminalDurations) { for (auto duration : Left.TerminalDurations)
Node->TerminalDurations.push_back(duration); Data.TerminalDurations.push_back(duration);
} for (auto duration : Right.TerminalDurations)
for (auto duration : Right.TerminalDurations) { Data.TerminalDurations.push_back(duration);
Node->TerminalDurations.push_back(duration);
}
for (auto duration : Left.IntermediateDurations) {
Node->IntermediateDurations.push_back(duration);
}
for (auto duration : Right.IntermediateDurations) {
Node->IntermediateDurations.push_back(duration);
}
return Node; for (auto duration : Left.IntermediateDurations)
Data.IntermediateDurations.push_back(duration);
for (auto duration : Right.IntermediateDurations)
Data.IntermediateDurations.push_back(duration);
return Data;
} }
using StackTrieNode = TrieNode<StackDuration>;
template <AggregationType AggType> template <AggregationType AggType>
std::size_t GetValueForStack(const TrieNode *Node); std::size_t GetValueForStack(const StackTrieNode *Node);
// When computing total time spent in a stack, we're adding the timings from // When computing total time spent in a stack, we're adding the timings from
// its callees and the timings from when it was a leaf. // its callees and the timings from when it was a leaf.
template <> template <>
std::size_t std::size_t
GetValueForStack<AggregationType::TOTAL_TIME>(const TrieNode *Node) { GetValueForStack<AggregationType::TOTAL_TIME>(const StackTrieNode *Node) {
auto TopSum = std::accumulate(Node->TerminalDurations.begin(), auto TopSum = std::accumulate(Node->ExtraData.TerminalDurations.begin(),
Node->TerminalDurations.end(), 0uLL); Node->ExtraData.TerminalDurations.end(), 0uLL);
return std::accumulate(Node->IntermediateDurations.begin(), return std::accumulate(Node->ExtraData.IntermediateDurations.begin(),
Node->IntermediateDurations.end(), TopSum); Node->ExtraData.IntermediateDurations.end(), TopSum);
} }
// Calculates how many times a function was invoked. // Calculates how many times a function was invoked.
// TODO: Hook up option to produce stacks // TODO: Hook up option to produce stacks
template <> template <>
std::size_t std::size_t
GetValueForStack<AggregationType::INVOCATION_COUNT>(const TrieNode *Node) { GetValueForStack<AggregationType::INVOCATION_COUNT>(const StackTrieNode *Node) {
return Node->TerminalDurations.size() + Node->IntermediateDurations.size(); return Node->ExtraData.TerminalDurations.size() +
Node->ExtraData.IntermediateDurations.size();
} }
// Make sure there are implementations for each enum value. // Make sure there are implementations for each enum value.
template <AggregationType T> struct DependentFalseType : std::false_type {}; template <AggregationType T> struct DependentFalseType : std::false_type {};
template <AggregationType AggType> template <AggregationType AggType>
std::size_t GetValueForStack(const TrieNode *Node) { std::size_t GetValueForStack(const StackTrieNode *Node) {
static_assert(DependentFalseType<AggType>::value, static_assert(DependentFalseType<AggType>::value,
"No implementation found for aggregation type provided."); "No implementation found for aggregation type provided.");
return 0; return 0;
@ -353,21 +319,21 @@ std::size_t GetValueForStack(const TrieNode *Node) {
class StackTrie { class StackTrie {
// Avoid the magic number of 4 propagated through the code with an alias. // Avoid the magic number of 4 propagated through the code with an alias.
// We use this SmallVector to track the root nodes in a call graph. // We use this SmallVector to track the root nodes in a call graph.
using RootVector = SmallVector<TrieNode *, 4>; using RootVector = SmallVector<StackTrieNode *, 4>;
// We maintain pointers to the roots of the tries we see. // We maintain pointers to the roots of the tries we see.
DenseMap<uint32_t, RootVector> Roots; DenseMap<uint32_t, RootVector> Roots;
// We make sure all the nodes are accounted for in this list. // We make sure all the nodes are accounted for in this list.
std::forward_list<TrieNode> NodeStore; std::forward_list<StackTrieNode> NodeStore;
// A map of thread ids to pairs call stack trie nodes and their start times. // A map of thread ids to pairs call stack trie nodes and their start times.
DenseMap<uint32_t, SmallVector<std::pair<TrieNode *, uint64_t>, 8>> DenseMap<uint32_t, SmallVector<std::pair<StackTrieNode *, uint64_t>, 8>>
ThreadStackMap; ThreadStackMap;
TrieNode *createTrieNode(uint32_t ThreadId, int32_t FuncId, StackTrieNode *createTrieNode(uint32_t ThreadId, int32_t FuncId,
TrieNode *Parent) { StackTrieNode *Parent) {
NodeStore.push_front(TrieNode{FuncId, Parent, {}, {}, {}}); NodeStore.push_front(StackTrieNode{FuncId, Parent, {}, {{}, {}}});
auto I = NodeStore.begin(); auto I = NodeStore.begin();
auto *Node = &*I; auto *Node = &*I;
if (!Parent) if (!Parent)
@ -375,10 +341,10 @@ class StackTrie {
return Node; return Node;
} }
TrieNode *findRootNode(uint32_t ThreadId, int32_t FuncId) { StackTrieNode *findRootNode(uint32_t ThreadId, int32_t FuncId) {
const auto &RootsByThread = Roots[ThreadId]; const auto &RootsByThread = Roots[ThreadId];
auto I = find_if(RootsByThread, auto I = find_if(RootsByThread,
[&](TrieNode *N) { return N->FuncId == FuncId; }); [&](StackTrieNode *N) { return N->FuncId == FuncId; });
return (I == RootsByThread.end()) ? nullptr : *I; return (I == RootsByThread.end()) ? nullptr : *I;
} }
@ -416,7 +382,7 @@ public:
auto &Top = TS.back(); auto &Top = TS.back();
auto I = find_if(Top.first->Callees, auto I = find_if(Top.first->Callees,
[&](TrieNode *N) { return N->FuncId == R.FuncId; }); [&](StackTrieNode *N) { return N->FuncId == R.FuncId; });
if (I == Top.first->Callees.end()) { if (I == Top.first->Callees.end()) {
// We didn't find the callee in the stack trie, so we're going to // We didn't find the callee in the stack trie, so we're going to
// add to the stack then set up the pointers properly. // add to the stack then set up the pointers properly.
@ -447,8 +413,8 @@ public:
return AccountRecordStatus::ENTRY_NOT_FOUND; return AccountRecordStatus::ENTRY_NOT_FOUND;
} }
auto FunctionEntryMatch = auto FunctionEntryMatch = find_if(
find_if(reverse(TS), [&](const std::pair<TrieNode *, uint64_t> &E) { reverse(TS), [&](const std::pair<StackTrieNode *, uint64_t> &E) {
return E.first->FuncId == R.FuncId; return E.first->FuncId == R.FuncId;
}); });
auto status = AccountRecordStatus::OK; auto status = AccountRecordStatus::OK;
@ -461,14 +427,14 @@ public:
} }
auto I = FunctionEntryMatch.base(); auto I = FunctionEntryMatch.base();
for (auto &E : make_range(I, TS.end() - 1)) for (auto &E : make_range(I, TS.end() - 1))
E.first->IntermediateDurations.push_back(std::max(E.second, R.TSC) - E.first->ExtraData.IntermediateDurations.push_back(
std::min(E.second, R.TSC)); std::max(E.second, R.TSC) - std::min(E.second, R.TSC));
auto &Deepest = TS.back(); auto &Deepest = TS.back();
if (wasLastRecordExit) if (wasLastRecordExit)
Deepest.first->IntermediateDurations.push_back( Deepest.first->ExtraData.IntermediateDurations.push_back(
std::max(Deepest.second, R.TSC) - std::min(Deepest.second, R.TSC)); std::max(Deepest.second, R.TSC) - std::min(Deepest.second, R.TSC));
else else
Deepest.first->TerminalDurations.push_back( Deepest.first->ExtraData.TerminalDurations.push_back(
std::max(Deepest.second, R.TSC) - std::min(Deepest.second, R.TSC)); std::max(Deepest.second, R.TSC) - std::min(Deepest.second, R.TSC));
TS.erase(I, TS.end()); TS.erase(I, TS.end());
return status; return status;
@ -479,11 +445,11 @@ public:
bool isEmpty() const { return Roots.empty(); } bool isEmpty() const { return Roots.empty(); }
void printStack(raw_ostream &OS, const TrieNode *Top, void printStack(raw_ostream &OS, const StackTrieNode *Top,
FuncIdConversionHelper &FN) { FuncIdConversionHelper &FN) {
// Traverse the pointers up to the parent, noting the sums, then print // Traverse the pointers up to the parent, noting the sums, then print
// in reverse order (callers at top, callees down bottom). // in reverse order (callers at top, callees down bottom).
SmallVector<const TrieNode *, 8> CurrentStack; SmallVector<const StackTrieNode *, 8> CurrentStack;
for (auto *F = Top; F != nullptr; F = F->Parent) for (auto *F = Top; F != nullptr; F = F->Parent)
CurrentStack.push_back(F); CurrentStack.push_back(F);
int Level = 0; int Level = 0;
@ -491,21 +457,22 @@ public:
"count", "sum"); "count", "sum");
for (auto *F : for (auto *F :
reverse(make_range(CurrentStack.begin() + 1, CurrentStack.end()))) { reverse(make_range(CurrentStack.begin() + 1, CurrentStack.end()))) {
auto Sum = std::accumulate(F->IntermediateDurations.begin(), auto Sum = std::accumulate(F->ExtraData.IntermediateDurations.begin(),
F->IntermediateDurations.end(), 0LL); F->ExtraData.IntermediateDurations.end(), 0LL);
auto FuncId = FN.SymbolOrNumber(F->FuncId); auto FuncId = FN.SymbolOrNumber(F->FuncId);
OS << formatv("#{0,-4} {1,-60} {2,+12} {3,+16}\n", Level++, OS << formatv("#{0,-4} {1,-60} {2,+12} {3,+16}\n", Level++,
FuncId.size() > 60 ? FuncId.substr(0, 57) + "..." : FuncId, FuncId.size() > 60 ? FuncId.substr(0, 57) + "..." : FuncId,
F->IntermediateDurations.size(), Sum); F->ExtraData.IntermediateDurations.size(), Sum);
} }
auto *Leaf = *CurrentStack.begin(); auto *Leaf = *CurrentStack.begin();
auto LeafSum = std::accumulate(Leaf->TerminalDurations.begin(), auto LeafSum =
Leaf->TerminalDurations.end(), 0LL); std::accumulate(Leaf->ExtraData.TerminalDurations.begin(),
Leaf->ExtraData.TerminalDurations.end(), 0LL);
auto LeafFuncId = FN.SymbolOrNumber(Leaf->FuncId); auto LeafFuncId = FN.SymbolOrNumber(Leaf->FuncId);
OS << formatv("#{0,-4} {1,-60} {2,+12} {3,+16}\n", Level++, OS << formatv("#{0,-4} {1,-60} {2,+12} {3,+16}\n", Level++,
LeafFuncId.size() > 60 ? LeafFuncId.substr(0, 57) + "..." LeafFuncId.size() > 60 ? LeafFuncId.substr(0, 57) + "..."
: LeafFuncId, : LeafFuncId,
Leaf->TerminalDurations.size(), LeafSum); Leaf->ExtraData.TerminalDurations.size(), LeafSum);
OS << "\n"; OS << "\n";
} }
@ -552,20 +519,20 @@ public:
/// Creates a merged list of Tries for unique stacks that disregards their /// Creates a merged list of Tries for unique stacks that disregards their
/// thread IDs. /// thread IDs.
RootVector mergeAcrossThreads(std::forward_list<TrieNode> &NodeStore) { RootVector mergeAcrossThreads(std::forward_list<StackTrieNode> &NodeStore) {
RootVector MergedByThreadRoots; RootVector MergedByThreadRoots;
for (auto MapIter : Roots) { for (auto MapIter : Roots) {
const auto &RootNodeVector = MapIter.second; const auto &RootNodeVector = MapIter.second;
for (auto *Node : RootNodeVector) { for (auto *Node : RootNodeVector) {
auto MaybeFoundIter = auto MaybeFoundIter =
find_if(MergedByThreadRoots, [Node](TrieNode *elem) { find_if(MergedByThreadRoots, [Node](StackTrieNode *elem) {
return Node->FuncId == elem->FuncId; return Node->FuncId == elem->FuncId;
}); });
if (MaybeFoundIter == MergedByThreadRoots.end()) { if (MaybeFoundIter == MergedByThreadRoots.end()) {
MergedByThreadRoots.push_back(Node); MergedByThreadRoots.push_back(Node);
} else { } else {
MergedByThreadRoots.push_back( MergedByThreadRoots.push_back(mergeTrieNodes(
mergeTrieNodes(**MaybeFoundIter, *Node, nullptr, NodeStore)); **MaybeFoundIter, *Node, nullptr, NodeStore, mergeStackDuration));
MergedByThreadRoots.erase(MaybeFoundIter); MergedByThreadRoots.erase(MaybeFoundIter);
} }
} }
@ -577,7 +544,7 @@ public:
template <AggregationType AggType> template <AggregationType AggType>
void printAllAggregatingThreads(raw_ostream &OS, FuncIdConversionHelper &FN, void printAllAggregatingThreads(raw_ostream &OS, FuncIdConversionHelper &FN,
StackOutputFormat format) { StackOutputFormat format) {
std::forward_list<TrieNode> AggregatedNodeStore; std::forward_list<StackTrieNode> AggregatedNodeStore;
RootVector MergedByThreadRoots = mergeAcrossThreads(AggregatedNodeStore); RootVector MergedByThreadRoots = mergeAcrossThreads(AggregatedNodeStore);
bool reportThreadId = false; bool reportThreadId = false;
printAll<AggType>(OS, FN, MergedByThreadRoots, printAll<AggType>(OS, FN, MergedByThreadRoots,
@ -586,7 +553,7 @@ public:
/// Merges the trie by thread id before printing top stacks. /// Merges the trie by thread id before printing top stacks.
void printAggregatingThreads(raw_ostream &OS, FuncIdConversionHelper &FN) { void printAggregatingThreads(raw_ostream &OS, FuncIdConversionHelper &FN) {
std::forward_list<TrieNode> AggregatedNodeStore; std::forward_list<StackTrieNode> AggregatedNodeStore;
RootVector MergedByThreadRoots = mergeAcrossThreads(AggregatedNodeStore); RootVector MergedByThreadRoots = mergeAcrossThreads(AggregatedNodeStore);
print(OS, FN, MergedByThreadRoots); print(OS, FN, MergedByThreadRoots);
} }
@ -595,7 +562,7 @@ public:
template <AggregationType AggType> template <AggregationType AggType>
void printAll(raw_ostream &OS, FuncIdConversionHelper &FN, void printAll(raw_ostream &OS, FuncIdConversionHelper &FN,
RootVector RootValues, uint32_t ThreadId, bool ReportThread) { RootVector RootValues, uint32_t ThreadId, bool ReportThread) {
SmallVector<const TrieNode *, 16> S; SmallVector<const StackTrieNode *, 16> S;
for (const auto *N : RootValues) { for (const auto *N : RootValues) {
S.clear(); S.clear();
S.push_back(N); S.push_back(N);
@ -616,10 +583,10 @@ public:
template <AggregationType AggType> template <AggregationType AggType>
void printSingleStack(raw_ostream &OS, FuncIdConversionHelper &Converter, void printSingleStack(raw_ostream &OS, FuncIdConversionHelper &Converter,
bool ReportThread, uint32_t ThreadId, bool ReportThread, uint32_t ThreadId,
const TrieNode *Node) { const StackTrieNode *Node) {
if (ReportThread) if (ReportThread)
OS << "thread_" << ThreadId << ";"; OS << "thread_" << ThreadId << ";";
SmallVector<const TrieNode *, 5> lineage{}; SmallVector<const StackTrieNode *, 5> lineage{};
lineage.push_back(Node); lineage.push_back(Node);
while (lineage.back()->Parent != nullptr) while (lineage.back()->Parent != nullptr)
lineage.push_back(lineage.back()->Parent); lineage.push_back(lineage.back()->Parent);
@ -639,15 +606,17 @@ public:
// - Total number of unique stacks // - Total number of unique stacks
// - Top 10 stacks by count // - Top 10 stacks by count
// - Top 10 stacks by aggregate duration // - Top 10 stacks by aggregate duration
SmallVector<std::pair<const TrieNode *, uint64_t>, 11> TopStacksByCount; SmallVector<std::pair<const StackTrieNode *, uint64_t>, 11>
SmallVector<std::pair<const TrieNode *, uint64_t>, 11> TopStacksBySum; TopStacksByCount;
auto greater_second = [](const std::pair<const TrieNode *, uint64_t> &A, SmallVector<std::pair<const StackTrieNode *, uint64_t>, 11> TopStacksBySum;
const std::pair<const TrieNode *, uint64_t> &B) { auto greater_second =
[](const std::pair<const StackTrieNode *, uint64_t> &A,
const std::pair<const StackTrieNode *, uint64_t> &B) {
return A.second > B.second; return A.second > B.second;
}; };
uint64_t UniqueStacks = 0; uint64_t UniqueStacks = 0;
for (const auto *N : RootValues) { for (const auto *N : RootValues) {
SmallVector<const TrieNode *, 16> S; SmallVector<const StackTrieNode *, 16> S;
S.emplace_back(N); S.emplace_back(N);
while (!S.empty()) { while (!S.empty()) {
@ -655,10 +624,11 @@ public:
// We only start printing the stack (by walking up the parent pointers) // We only start printing the stack (by walking up the parent pointers)
// when we get to a leaf function. // when we get to a leaf function.
if (!Top->TerminalDurations.empty()) { if (!Top->ExtraData.TerminalDurations.empty()) {
++UniqueStacks; ++UniqueStacks;
auto TopSum = std::accumulate(Top->TerminalDurations.begin(), auto TopSum =
Top->TerminalDurations.end(), 0uLL); std::accumulate(Top->ExtraData.TerminalDurations.begin(),
Top->ExtraData.TerminalDurations.end(), 0uLL);
{ {
auto E = std::make_pair(Top, TopSum); auto E = std::make_pair(Top, TopSum);
TopStacksBySum.insert(std::lower_bound(TopStacksBySum.begin(), TopStacksBySum.insert(std::lower_bound(TopStacksBySum.begin(),
@ -669,7 +639,8 @@ public:
TopStacksBySum.pop_back(); TopStacksBySum.pop_back();
} }
{ {
auto E = std::make_pair(Top, Top->TerminalDurations.size()); auto E =
std::make_pair(Top, Top->ExtraData.TerminalDurations.size());
TopStacksByCount.insert(std::lower_bound(TopStacksByCount.begin(), TopStacksByCount.insert(std::lower_bound(TopStacksByCount.begin(),
TopStacksByCount.end(), E, TopStacksByCount.end(), E,
greater_second), greater_second),