From 424279958d14995a0ccedcb3e8723047743fe538 Mon Sep 17 00:00:00 2001 From: Keith Wyss Date: Tue, 7 Nov 2017 00:28:28 +0000 Subject: [PATCH] [XRay] Minimal tool to convert xray traces to Chrome's Trace Event Format. Minimal tool to convert xray traces to Chrome's Trace Event Format. Summary: Make use of Chrome Trace Event format's Duration events and stack frame dict to produce Json files that chrome://tracing can visualize from xray function call traces. Trace Event format is more robust and has several features like argument logging, function categorization, multi process traces, etc. that we can add as needed. Duration events cover an important base case. Part of this change is rearranging the code so that the TrieNode data structure can be used from multiple tools and can carry parameterized baggage on the nodes. I put the actual behavior changes in llvm-xray convert exclusively. Exploring the trace of instrumented llc was pretty nifty if overwhelming. I can envision this being very useful for analyzing contention scenarios or tuning parameters like batch sizes in a producer consumer queue. For more targeted traces likemthis, let's talk about how we want to approach trace pruning. Reviewers: dberris, pelikan Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D39362 llvm-svn: 317531 --- llvm/tools/llvm-xray/trie-node.h | 92 ++++++++++++ llvm/tools/llvm-xray/xray-converter.cc | 198 ++++++++++++++++++++++++- llvm/tools/llvm-xray/xray-converter.h | 7 +- llvm/tools/llvm-xray/xray-stacks.cc | 189 ++++++++++------------- 4 files changed, 374 insertions(+), 112 deletions(-) create mode 100644 llvm/tools/llvm-xray/trie-node.h diff --git a/llvm/tools/llvm-xray/trie-node.h b/llvm/tools/llvm-xray/trie-node.h new file mode 100644 index 000000000000..e6ba4e215b91 --- /dev/null +++ b/llvm/tools/llvm-xray/trie-node.h @@ -0,0 +1,92 @@ +//===- trie-node.h - XRay Call Stack Data Structure -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides a data structure and routines for working with call stacks +// of instrumented functions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_XRAY_STACK_TRIE_H +#define LLVM_TOOLS_LLVM_XRAY_STACK_TRIE_H + +#include +#include + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" + +/// A type to represent a trie of invocations. It is useful to construct a +/// graph of these nodes from reading an XRay trace, such that each function +/// call can be placed in a larger context. +/// +/// The template parameter allows users of the template to attach their own +/// data elements to each node in the invocation graph. +template struct TrieNode { + /// The function ID. + int32_t FuncId; + + /// The caller of this function. + TrieNode *Parent; + + /// The callees from this function. + llvm::SmallVector *, 4> Callees; + + /// Additional parameterized data on each node. + AssociatedData ExtraData; +}; + +/// Merges together two TrieNodes with like function ids, aggregating their +/// callee lists and durations. The caller must provide storage where new merged +/// nodes can be allocated in the form of a linked list. +template +TrieNode * +mergeTrieNodes(const TrieNode &Left, const TrieNode &Right, + /*Non-deduced pointer type for nullptr compatibility*/ + typename std::remove_reference *>::type NewParent, + std::forward_list> &NodeStore, + Callable &&MergeCallable) { + llvm::function_ref MergeFn( + std::forward(MergeCallable)); + assert(Left.FuncId == Right.FuncId); + NodeStore.push_front(TrieNode{ + Left.FuncId, NewParent, {}, MergeFn(Left.ExtraData, Right.ExtraData)}); + auto I = NodeStore.begin(); + auto *Node = &*I; + + // Build a map of callees from the left side. + llvm::DenseMap *> LeftCalleesByFuncId; + for (auto *Callee : Left.Callees) { + LeftCalleesByFuncId[Callee->FuncId] = Callee; + } + + // Iterate through the right side, either merging with the map values or + // directly adding to the Callees vector. The iteration also removes any + // merged values from the left side map. + // TODO: Unroll into iterative and explicit stack for efficiency. + for (auto *Callee : Right.Callees) { + auto iter = LeftCalleesByFuncId.find(Callee->FuncId); + if (iter != LeftCalleesByFuncId.end()) { + Node->Callees.push_back( + mergeTrieNodes(*(iter->second), *Callee, Node, NodeStore, MergeFn)); + LeftCalleesByFuncId.erase(iter); + } else { + Node->Callees.push_back(Callee); + } + } + + // Add any callees that weren't found in the right side. + for (auto MapPairIter : LeftCalleesByFuncId) { + Node->Callees.push_back(MapPairIter.second); + } + + return Node; +} + +#endif // LLVM_TOOLS_LLVM_XRAY_STACK_TRIE_H diff --git a/llvm/tools/llvm-xray/xray-converter.cc b/llvm/tools/llvm-xray/xray-converter.cc index f1aec65bc675..aa0da55207b3 100644 --- a/llvm/tools/llvm-xray/xray-converter.cc +++ b/llvm/tools/llvm-xray/xray-converter.cc @@ -12,10 +12,12 @@ //===----------------------------------------------------------------------===// #include "xray-converter.h" +#include "trie-node.h" #include "xray-registry.h" #include "llvm/DebugInfo/Symbolize/Symbolize.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/YAMLTraits.h" #include "llvm/Support/raw_ostream.h" @@ -32,11 +34,14 @@ static cl::SubCommand Convert("convert", "Trace Format Conversion"); static cl::opt ConvertInput(cl::Positional, cl::desc(""), cl::Required, cl::sub(Convert)); -enum class ConvertFormats { BINARY, YAML }; +enum class ConvertFormats { BINARY, YAML, CHROME_TRACE_EVENT }; static cl::opt ConvertOutputFormat( "output-format", cl::desc("output format"), cl::values(clEnumValN(ConvertFormats::BINARY, "raw", "output in binary"), - clEnumValN(ConvertFormats::YAML, "yaml", "output in yaml")), + clEnumValN(ConvertFormats::YAML, "yaml", "output in yaml"), + clEnumValN(ConvertFormats::CHROME_TRACE_EVENT, "trace_event", + "Output in chrome's trace event format. " + "May be visualized with the Catapult trace viewer.")), cl::sub(Convert)); static cl::alias ConvertOutputFormat2("f", cl::aliasopt(ConvertOutputFormat), cl::desc("Alias for -output-format"), @@ -142,6 +147,192 @@ void TraceConverter::exportAsRAWv1(const Trace &Records, raw_ostream &OS) { } } +namespace { + +// A structure that allows building a dictionary of stack ids for the Chrome +// trace event format. +struct StackIdData { + // Each Stack of function calls has a unique ID. + unsigned id; + + // Bookkeeping so that IDs can be maintained uniquely across threads. + // Traversal keeps sibling pointers to other threads stacks. This is helpful + // to determine when a thread encounters a new stack and should assign a new + // unique ID. + SmallVector *, 4> siblings; +}; + +using StackTrieNode = TrieNode; + +// A helper function to find the sibling nodes for an encountered function in a +// thread of execution. Relies on the invariant that each time a new node is +// traversed in a thread, sibling bidirectional pointers are maintained. +SmallVector +findSiblings(StackTrieNode *parent, int32_t FnId, uint32_t TId, + const DenseMap> + &StackRootsByThreadId) { + + SmallVector Siblings{}; + + if (parent == nullptr) { + for (auto map_iter : StackRootsByThreadId) { + // Only look for siblings in other threads. + if (map_iter.first != TId) + for (auto node_iter : map_iter.second) { + if (node_iter->FuncId == FnId) + Siblings.push_back(node_iter); + } + } + return Siblings; + } + + for (auto *ParentSibling : parent->ExtraData.siblings) + for (auto node_iter : ParentSibling->Callees) + if (node_iter->FuncId == FnId) + Siblings.push_back(node_iter); + + return Siblings; +} + +// Given a function being invoked in a thread with id TId, finds and returns the +// StackTrie representing the function call stack. If no node exists, creates +// the node. Assigns unique IDs to stacks newly encountered among all threads +// and keeps sibling links up to when creating new nodes. +StackTrieNode *findOrCreateStackNode( + StackTrieNode *Parent, int32_t FuncId, uint32_t TId, + DenseMap> &StackRootsByThreadId, + DenseMap &StacksByStackId, unsigned *id_counter, + std::forward_list &NodeStore) { + SmallVector &ParentCallees = + Parent == nullptr ? StackRootsByThreadId[TId] : Parent->Callees; + auto match = find_if(ParentCallees, [FuncId](StackTrieNode *ParentCallee) { + return FuncId == ParentCallee->FuncId; + }); + if (match != ParentCallees.end()) + return *match; + + SmallVector siblings = + findSiblings(Parent, FuncId, TId, StackRootsByThreadId); + if (siblings.empty()) { + NodeStore.push_front({FuncId, Parent, {}, {(*id_counter)++, {}}}); + StackTrieNode *CurrentStack = &NodeStore.front(); + StacksByStackId[*id_counter - 1] = CurrentStack; + ParentCallees.push_back(CurrentStack); + return CurrentStack; + } + unsigned stack_id = siblings[0]->ExtraData.id; + NodeStore.push_front({FuncId, Parent, {}, {stack_id, std::move(siblings)}}); + StackTrieNode *CurrentStack = &NodeStore.front(); + for (auto *sibling : CurrentStack->ExtraData.siblings) + sibling->ExtraData.siblings.push_back(CurrentStack); + ParentCallees.push_back(CurrentStack); + return CurrentStack; +} + +void writeTraceViewerRecord(raw_ostream &OS, int32_t FuncId, uint32_t TId, + bool Symbolize, + const FuncIdConversionHelper &FuncIdHelper, + double EventTimestampUs, + const StackTrieNode &StackCursor, + StringRef FunctionPhenotype) { + OS << " "; + OS << llvm::formatv( + R"({ "name" : "{0}", "ph" : "{1}", "tid" : "{2}", "pid" : "1", )" + R"("ts" : "{3:f3}", "sf" : "{4}" })", + (Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId) + : llvm::to_string(FuncId)), + FunctionPhenotype, TId, EventTimestampUs, StackCursor.ExtraData.id); +} + +} // namespace + +void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records, + raw_ostream &OS) { + const auto &FH = Records.getFileHeader(); + auto CycleFreq = FH.CycleFrequency; + + unsigned id_counter = 0; + + OS << "{\n \"traceEvents\": ["; + DenseMap StackCursorByThreadId{}; + DenseMap> StackRootsByThreadId{}; + DenseMap StacksByStackId{}; + std::forward_list NodeStore{}; + int loop_count = 0; + for (const auto &R : Records) { + if (loop_count++ == 0) + OS << "\n"; + else + OS << ",\n"; + + // Chrome trace event format always wants data in micros. + // CyclesPerMicro = CycleHertz / 10^6 + // TSC / CyclesPerMicro == TSC * 10^6 / CycleHertz == MicroTimestamp + // Could lose some precision here by converting the TSC to a double to + // multiply by the period in micros. 52 bit mantissa is a good start though. + // TODO: Make feature request to Chrome Trace viewer to accept ticks and a + // frequency or do some more involved calculation to avoid dangers of + // conversion. + double EventTimestampUs = double(1000000) / CycleFreq * double(R.TSC); + StackTrieNode *&StackCursor = StackCursorByThreadId[R.TId]; + switch (R.Type) { + case RecordTypes::ENTER: + case RecordTypes::ENTER_ARG: + StackCursor = findOrCreateStackNode(StackCursor, R.FuncId, R.TId, + StackRootsByThreadId, StacksByStackId, + &id_counter, NodeStore); + // Each record is represented as a json dictionary with function name, + // type of B for begin or E for end, thread id, process id (faked), + // timestamp in microseconds, and a stack frame id. The ids are logged + // in an id dictionary after the events. + writeTraceViewerRecord(OS, R.FuncId, R.TId, Symbolize, FuncIdHelper, + EventTimestampUs, *StackCursor, "B"); + break; + case RecordTypes::EXIT: + case RecordTypes::TAIL_EXIT: + // No entries to record end for. + if (StackCursor == nullptr) + break; + // Should we emit an END record anyway or account this condition? + // (And/Or in loop termination below) + StackTrieNode *PreviousCursor = nullptr; + do { + writeTraceViewerRecord(OS, StackCursor->FuncId, R.TId, Symbolize, + FuncIdHelper, EventTimestampUs, *StackCursor, + "E"); + PreviousCursor = StackCursor; + StackCursor = StackCursor->Parent; + } while (PreviousCursor->FuncId != R.FuncId && StackCursor != nullptr); + break; + } + } + OS << "\n ],\n"; // Close the Trace Events array. + OS << " " + << "\"displayTimeUnit\": \"ns\",\n"; + + // The stackFrames dictionary substantially reduces size of the output file by + // avoiding repeating the entire call stack of function names for each entry. + OS << R"( "stackFrames": {)"; + int stack_frame_count = 0; + for (auto map_iter : StacksByStackId) { + if (stack_frame_count++ == 0) + OS << "\n"; + else + OS << ",\n"; + OS << " "; + OS << llvm::formatv( + R"("{0}" : { "name" : "{1}")", map_iter.first, + (Symbolize ? FuncIdHelper.SymbolOrNumber(map_iter.second->FuncId) + : llvm::to_string(map_iter.second->FuncId))); + if (map_iter.second->Parent != nullptr) + OS << llvm::formatv(R"(, "parent": "{0}")", + map_iter.second->Parent->ExtraData.id); + OS << " }"; + } + OS << "\n }\n"; // Close the stack frames map. + OS << "}\n"; // Close the JSON entry. +} + namespace llvm { namespace xray { @@ -191,6 +382,9 @@ static CommandRegistration Unused(&Convert, []() -> Error { case ConvertFormats::BINARY: TC.exportAsRAWv1(T, OS); break; + case ConvertFormats::CHROME_TRACE_EVENT: + TC.exportAsChromeTraceEventFormat(T, OS); + break; } return Error::success(); }); diff --git a/llvm/tools/llvm-xray/xray-converter.h b/llvm/tools/llvm-xray/xray-converter.h index fa0d5e132f14..5f0a3ee298eb 100644 --- a/llvm/tools/llvm-xray/xray-converter.h +++ b/llvm/tools/llvm-xray/xray-converter.h @@ -15,8 +15,8 @@ #define LLVM_TOOLS_LLVM_XRAY_XRAY_CONVERTER_H #include "func-id-helper.h" -#include "llvm/XRay/XRayRecord.h" #include "llvm/XRay/Trace.h" +#include "llvm/XRay/XRayRecord.h" namespace llvm { namespace xray { @@ -31,6 +31,11 @@ public: void exportAsYAML(const Trace &Records, raw_ostream &OS); void exportAsRAWv1(const Trace &Records, raw_ostream &OS); + + /// For this conversion, the Function records within each thread are expected + /// to be in sorted TSC order. The trace event format encodes stack traces, so + /// the linear history is essential for correct output. + void exportAsChromeTraceEventFormat(const Trace &Records, raw_ostream &OS); }; } // namespace xray diff --git a/llvm/tools/llvm-xray/xray-stacks.cc b/llvm/tools/llvm-xray/xray-stacks.cc index fd5df82e093d..9474de047990 100644 --- a/llvm/tools/llvm-xray/xray-stacks.cc +++ b/llvm/tools/llvm-xray/xray-stacks.cc @@ -19,6 +19,7 @@ #include #include "func-id-helper.h" +#include "trie-node.h" #include "xray-registry.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" @@ -255,96 +256,61 @@ private: /// maintain an index of unique functions, and provide a means of iterating /// through all the instrumented call stacks which we know about. -struct TrieNode { - int32_t FuncId; - TrieNode *Parent; - SmallVector Callees; - // Separate durations depending on whether the node is the deepest node in the - // stack. - SmallVector TerminalDurations; - SmallVector IntermediateDurations; +struct StackDuration { + llvm::SmallVector TerminalDurations; + llvm::SmallVector IntermediateDurations; }; -/// Merges together two TrieNodes with like function ids, aggregating their -/// callee lists and durations. The caller must provide storage where new merged -/// nodes can be allocated in the form of a linked list. -TrieNode *mergeTrieNodes(const TrieNode &Left, const TrieNode &Right, - TrieNode *NewParent, - std::forward_list &NodeStore) { - assert(Left.FuncId == Right.FuncId); - NodeStore.push_front(TrieNode{Left.FuncId, NewParent, {}, {}, {}}); - auto I = NodeStore.begin(); - auto *Node = &*I; - - // Build a map of callees from the left side. - DenseMap LeftCalleesByFuncId; - for (auto *Callee : Left.Callees) { - LeftCalleesByFuncId[Callee->FuncId] = Callee; - } - - // Iterate through the right side, either merging with the map values or - // directly adding to the Callees vector. The iteration also removes any - // merged values from the left side map. - for (auto *Callee : Right.Callees) { - auto iter = LeftCalleesByFuncId.find(Callee->FuncId); - if (iter != LeftCalleesByFuncId.end()) { - Node->Callees.push_back( - mergeTrieNodes(*(iter->second), *Callee, Node, NodeStore)); - LeftCalleesByFuncId.erase(iter); - } else { - Node->Callees.push_back(Callee); - } - } - - // Add any callees that weren't found in the right side. - for (auto MapPairIter : LeftCalleesByFuncId) { - Node->Callees.push_back(MapPairIter.second); - } - +StackDuration mergeStackDuration(const StackDuration &Left, + const StackDuration &Right) { + StackDuration Data{}; + Data.TerminalDurations.reserve(Left.TerminalDurations.size() + + Right.TerminalDurations.size()); + Data.IntermediateDurations.reserve(Left.IntermediateDurations.size() + + Right.IntermediateDurations.size()); // Aggregate the durations. - for (auto duration : Left.TerminalDurations) { - Node->TerminalDurations.push_back(duration); - } - for (auto duration : Right.TerminalDurations) { - Node->TerminalDurations.push_back(duration); - } - for (auto duration : Left.IntermediateDurations) { - Node->IntermediateDurations.push_back(duration); - } - for (auto duration : Right.IntermediateDurations) { - Node->IntermediateDurations.push_back(duration); - } + for (auto duration : Left.TerminalDurations) + Data.TerminalDurations.push_back(duration); + for (auto duration : Right.TerminalDurations) + Data.TerminalDurations.push_back(duration); - return Node; + for (auto duration : Left.IntermediateDurations) + Data.IntermediateDurations.push_back(duration); + for (auto duration : Right.IntermediateDurations) + Data.IntermediateDurations.push_back(duration); + return Data; } +using StackTrieNode = TrieNode; + template -std::size_t GetValueForStack(const TrieNode *Node); +std::size_t GetValueForStack(const StackTrieNode *Node); // When computing total time spent in a stack, we're adding the timings from // its callees and the timings from when it was a leaf. template <> std::size_t -GetValueForStack(const TrieNode *Node) { - auto TopSum = std::accumulate(Node->TerminalDurations.begin(), - Node->TerminalDurations.end(), 0uLL); - return std::accumulate(Node->IntermediateDurations.begin(), - Node->IntermediateDurations.end(), TopSum); +GetValueForStack(const StackTrieNode *Node) { + auto TopSum = std::accumulate(Node->ExtraData.TerminalDurations.begin(), + Node->ExtraData.TerminalDurations.end(), 0uLL); + return std::accumulate(Node->ExtraData.IntermediateDurations.begin(), + Node->ExtraData.IntermediateDurations.end(), TopSum); } // Calculates how many times a function was invoked. // TODO: Hook up option to produce stacks template <> std::size_t -GetValueForStack(const TrieNode *Node) { - return Node->TerminalDurations.size() + Node->IntermediateDurations.size(); +GetValueForStack(const StackTrieNode *Node) { + return Node->ExtraData.TerminalDurations.size() + + Node->ExtraData.IntermediateDurations.size(); } // Make sure there are implementations for each enum value. template struct DependentFalseType : std::false_type {}; template -std::size_t GetValueForStack(const TrieNode *Node) { +std::size_t GetValueForStack(const StackTrieNode *Node) { static_assert(DependentFalseType::value, "No implementation found for aggregation type provided."); return 0; @@ -353,21 +319,21 @@ std::size_t GetValueForStack(const TrieNode *Node) { class StackTrie { // Avoid the magic number of 4 propagated through the code with an alias. // We use this SmallVector to track the root nodes in a call graph. - using RootVector = SmallVector; + using RootVector = SmallVector; // We maintain pointers to the roots of the tries we see. DenseMap Roots; // We make sure all the nodes are accounted for in this list. - std::forward_list NodeStore; + std::forward_list NodeStore; // A map of thread ids to pairs call stack trie nodes and their start times. - DenseMap, 8>> + DenseMap, 8>> ThreadStackMap; - TrieNode *createTrieNode(uint32_t ThreadId, int32_t FuncId, - TrieNode *Parent) { - NodeStore.push_front(TrieNode{FuncId, Parent, {}, {}, {}}); + StackTrieNode *createTrieNode(uint32_t ThreadId, int32_t FuncId, + StackTrieNode *Parent) { + NodeStore.push_front(StackTrieNode{FuncId, Parent, {}, {{}, {}}}); auto I = NodeStore.begin(); auto *Node = &*I; if (!Parent) @@ -375,10 +341,10 @@ class StackTrie { return Node; } - TrieNode *findRootNode(uint32_t ThreadId, int32_t FuncId) { + StackTrieNode *findRootNode(uint32_t ThreadId, int32_t FuncId) { const auto &RootsByThread = Roots[ThreadId]; auto I = find_if(RootsByThread, - [&](TrieNode *N) { return N->FuncId == FuncId; }); + [&](StackTrieNode *N) { return N->FuncId == FuncId; }); return (I == RootsByThread.end()) ? nullptr : *I; } @@ -416,7 +382,7 @@ public: auto &Top = TS.back(); auto I = find_if(Top.first->Callees, - [&](TrieNode *N) { return N->FuncId == R.FuncId; }); + [&](StackTrieNode *N) { return N->FuncId == R.FuncId; }); if (I == Top.first->Callees.end()) { // We didn't find the callee in the stack trie, so we're going to // add to the stack then set up the pointers properly. @@ -447,8 +413,8 @@ public: return AccountRecordStatus::ENTRY_NOT_FOUND; } - auto FunctionEntryMatch = - find_if(reverse(TS), [&](const std::pair &E) { + auto FunctionEntryMatch = find_if( + reverse(TS), [&](const std::pair &E) { return E.first->FuncId == R.FuncId; }); auto status = AccountRecordStatus::OK; @@ -461,14 +427,14 @@ public: } auto I = FunctionEntryMatch.base(); for (auto &E : make_range(I, TS.end() - 1)) - E.first->IntermediateDurations.push_back(std::max(E.second, R.TSC) - - std::min(E.second, R.TSC)); + E.first->ExtraData.IntermediateDurations.push_back( + std::max(E.second, R.TSC) - std::min(E.second, R.TSC)); auto &Deepest = TS.back(); if (wasLastRecordExit) - Deepest.first->IntermediateDurations.push_back( + Deepest.first->ExtraData.IntermediateDurations.push_back( std::max(Deepest.second, R.TSC) - std::min(Deepest.second, R.TSC)); else - Deepest.first->TerminalDurations.push_back( + Deepest.first->ExtraData.TerminalDurations.push_back( std::max(Deepest.second, R.TSC) - std::min(Deepest.second, R.TSC)); TS.erase(I, TS.end()); return status; @@ -479,11 +445,11 @@ public: bool isEmpty() const { return Roots.empty(); } - void printStack(raw_ostream &OS, const TrieNode *Top, + void printStack(raw_ostream &OS, const StackTrieNode *Top, FuncIdConversionHelper &FN) { // Traverse the pointers up to the parent, noting the sums, then print // in reverse order (callers at top, callees down bottom). - SmallVector CurrentStack; + SmallVector CurrentStack; for (auto *F = Top; F != nullptr; F = F->Parent) CurrentStack.push_back(F); int Level = 0; @@ -491,21 +457,22 @@ public: "count", "sum"); for (auto *F : reverse(make_range(CurrentStack.begin() + 1, CurrentStack.end()))) { - auto Sum = std::accumulate(F->IntermediateDurations.begin(), - F->IntermediateDurations.end(), 0LL); + auto Sum = std::accumulate(F->ExtraData.IntermediateDurations.begin(), + F->ExtraData.IntermediateDurations.end(), 0LL); auto FuncId = FN.SymbolOrNumber(F->FuncId); OS << formatv("#{0,-4} {1,-60} {2,+12} {3,+16}\n", Level++, FuncId.size() > 60 ? FuncId.substr(0, 57) + "..." : FuncId, - F->IntermediateDurations.size(), Sum); + F->ExtraData.IntermediateDurations.size(), Sum); } auto *Leaf = *CurrentStack.begin(); - auto LeafSum = std::accumulate(Leaf->TerminalDurations.begin(), - Leaf->TerminalDurations.end(), 0LL); + auto LeafSum = + std::accumulate(Leaf->ExtraData.TerminalDurations.begin(), + Leaf->ExtraData.TerminalDurations.end(), 0LL); auto LeafFuncId = FN.SymbolOrNumber(Leaf->FuncId); OS << formatv("#{0,-4} {1,-60} {2,+12} {3,+16}\n", Level++, LeafFuncId.size() > 60 ? LeafFuncId.substr(0, 57) + "..." : LeafFuncId, - Leaf->TerminalDurations.size(), LeafSum); + Leaf->ExtraData.TerminalDurations.size(), LeafSum); OS << "\n"; } @@ -552,20 +519,20 @@ public: /// Creates a merged list of Tries for unique stacks that disregards their /// thread IDs. - RootVector mergeAcrossThreads(std::forward_list &NodeStore) { + RootVector mergeAcrossThreads(std::forward_list &NodeStore) { RootVector MergedByThreadRoots; for (auto MapIter : Roots) { const auto &RootNodeVector = MapIter.second; for (auto *Node : RootNodeVector) { auto MaybeFoundIter = - find_if(MergedByThreadRoots, [Node](TrieNode *elem) { + find_if(MergedByThreadRoots, [Node](StackTrieNode *elem) { return Node->FuncId == elem->FuncId; }); if (MaybeFoundIter == MergedByThreadRoots.end()) { MergedByThreadRoots.push_back(Node); } else { - MergedByThreadRoots.push_back( - mergeTrieNodes(**MaybeFoundIter, *Node, nullptr, NodeStore)); + MergedByThreadRoots.push_back(mergeTrieNodes( + **MaybeFoundIter, *Node, nullptr, NodeStore, mergeStackDuration)); MergedByThreadRoots.erase(MaybeFoundIter); } } @@ -577,7 +544,7 @@ public: template void printAllAggregatingThreads(raw_ostream &OS, FuncIdConversionHelper &FN, StackOutputFormat format) { - std::forward_list AggregatedNodeStore; + std::forward_list AggregatedNodeStore; RootVector MergedByThreadRoots = mergeAcrossThreads(AggregatedNodeStore); bool reportThreadId = false; printAll(OS, FN, MergedByThreadRoots, @@ -586,7 +553,7 @@ public: /// Merges the trie by thread id before printing top stacks. void printAggregatingThreads(raw_ostream &OS, FuncIdConversionHelper &FN) { - std::forward_list AggregatedNodeStore; + std::forward_list AggregatedNodeStore; RootVector MergedByThreadRoots = mergeAcrossThreads(AggregatedNodeStore); print(OS, FN, MergedByThreadRoots); } @@ -595,7 +562,7 @@ public: template void printAll(raw_ostream &OS, FuncIdConversionHelper &FN, RootVector RootValues, uint32_t ThreadId, bool ReportThread) { - SmallVector S; + SmallVector S; for (const auto *N : RootValues) { S.clear(); S.push_back(N); @@ -616,10 +583,10 @@ public: template void printSingleStack(raw_ostream &OS, FuncIdConversionHelper &Converter, bool ReportThread, uint32_t ThreadId, - const TrieNode *Node) { + const StackTrieNode *Node) { if (ReportThread) OS << "thread_" << ThreadId << ";"; - SmallVector lineage{}; + SmallVector lineage{}; lineage.push_back(Node); while (lineage.back()->Parent != nullptr) lineage.push_back(lineage.back()->Parent); @@ -639,15 +606,17 @@ public: // - Total number of unique stacks // - Top 10 stacks by count // - Top 10 stacks by aggregate duration - SmallVector, 11> TopStacksByCount; - SmallVector, 11> TopStacksBySum; - auto greater_second = [](const std::pair &A, - const std::pair &B) { - return A.second > B.second; - }; + SmallVector, 11> + TopStacksByCount; + SmallVector, 11> TopStacksBySum; + auto greater_second = + [](const std::pair &A, + const std::pair &B) { + return A.second > B.second; + }; uint64_t UniqueStacks = 0; for (const auto *N : RootValues) { - SmallVector S; + SmallVector S; S.emplace_back(N); while (!S.empty()) { @@ -655,10 +624,11 @@ public: // We only start printing the stack (by walking up the parent pointers) // when we get to a leaf function. - if (!Top->TerminalDurations.empty()) { + if (!Top->ExtraData.TerminalDurations.empty()) { ++UniqueStacks; - auto TopSum = std::accumulate(Top->TerminalDurations.begin(), - Top->TerminalDurations.end(), 0uLL); + auto TopSum = + std::accumulate(Top->ExtraData.TerminalDurations.begin(), + Top->ExtraData.TerminalDurations.end(), 0uLL); { auto E = std::make_pair(Top, TopSum); TopStacksBySum.insert(std::lower_bound(TopStacksBySum.begin(), @@ -669,7 +639,8 @@ public: TopStacksBySum.pop_back(); } { - auto E = std::make_pair(Top, Top->TerminalDurations.size()); + auto E = + std::make_pair(Top, Top->ExtraData.TerminalDurations.size()); TopStacksByCount.insert(std::lower_bound(TopStacksByCount.begin(), TopStacksByCount.end(), E, greater_second),