forked from OSchip/llvm-project
398 lines
13 KiB
C++
398 lines
13 KiB
C++
//===- Profile.cpp - XRay Profile Abstraction -----------------------------===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Defines the XRay Profile class representing the latency profile generated by
|
|
// XRay's profiling mode.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
#include "llvm/XRay/Profile.h"
|
|
|
|
#include "llvm/Support/DataExtractor.h"
|
|
#include "llvm/Support/Error.h"
|
|
#include "llvm/Support/FileSystem.h"
|
|
#include "llvm/XRay/Trace.h"
|
|
#include <deque>
|
|
#include <memory>
|
|
|
|
namespace llvm {
|
|
namespace xray {
|
|
|
|
Profile::Profile(const Profile &O) {
|
|
// We need to re-create all the tries from the original (O), into the current
|
|
// Profile being initialized, through the Block instances we see.
|
|
for (const auto &Block : O) {
|
|
Blocks.push_back({Block.Thread, {}});
|
|
auto &B = Blocks.back();
|
|
for (const auto &PathData : Block.PathData)
|
|
B.PathData.push_back({internPath(cantFail(O.expandPath(PathData.first))),
|
|
PathData.second});
|
|
}
|
|
}
|
|
|
|
Profile &Profile::operator=(const Profile &O) {
|
|
Profile P = O;
|
|
*this = std::move(P);
|
|
return *this;
|
|
}
|
|
|
|
namespace {
|
|
|
|
struct BlockHeader {
|
|
uint32_t Size;
|
|
uint32_t Number;
|
|
uint64_t Thread;
|
|
};
|
|
|
|
static Expected<BlockHeader> readBlockHeader(DataExtractor &Extractor,
|
|
uint32_t &Offset) {
|
|
BlockHeader H;
|
|
uint32_t CurrentOffset = Offset;
|
|
H.Size = Extractor.getU32(&Offset);
|
|
if (Offset == CurrentOffset)
|
|
return make_error<StringError>(
|
|
Twine("Error parsing block header size at offset '") +
|
|
Twine(CurrentOffset) + "'",
|
|
std::make_error_code(std::errc::invalid_argument));
|
|
CurrentOffset = Offset;
|
|
H.Number = Extractor.getU32(&Offset);
|
|
if (Offset == CurrentOffset)
|
|
return make_error<StringError>(
|
|
Twine("Error parsing block header number at offset '") +
|
|
Twine(CurrentOffset) + "'",
|
|
std::make_error_code(std::errc::invalid_argument));
|
|
CurrentOffset = Offset;
|
|
H.Thread = Extractor.getU64(&Offset);
|
|
if (Offset == CurrentOffset)
|
|
return make_error<StringError>(
|
|
Twine("Error parsing block header thread id at offset '") +
|
|
Twine(CurrentOffset) + "'",
|
|
std::make_error_code(std::errc::invalid_argument));
|
|
return H;
|
|
}
|
|
|
|
static Expected<std::vector<Profile::FuncID>> readPath(DataExtractor &Extractor,
|
|
uint32_t &Offset) {
|
|
// We're reading a sequence of int32_t's until we find a 0.
|
|
std::vector<Profile::FuncID> Path;
|
|
auto CurrentOffset = Offset;
|
|
int32_t FuncId;
|
|
do {
|
|
FuncId = Extractor.getSigned(&Offset, 4);
|
|
if (CurrentOffset == Offset)
|
|
return make_error<StringError>(
|
|
Twine("Error parsing path at offset '") + Twine(CurrentOffset) + "'",
|
|
std::make_error_code(std::errc::invalid_argument));
|
|
CurrentOffset = Offset;
|
|
Path.push_back(FuncId);
|
|
} while (FuncId != 0);
|
|
return std::move(Path);
|
|
}
|
|
|
|
static Expected<Profile::Data> readData(DataExtractor &Extractor,
|
|
uint32_t &Offset) {
|
|
// We expect a certain number of elements for Data:
|
|
// - A 64-bit CallCount
|
|
// - A 64-bit CumulativeLocalTime counter
|
|
Profile::Data D;
|
|
auto CurrentOffset = Offset;
|
|
D.CallCount = Extractor.getU64(&Offset);
|
|
if (CurrentOffset == Offset)
|
|
return make_error<StringError>(
|
|
Twine("Error parsing call counts at offset '") + Twine(CurrentOffset) +
|
|
"'",
|
|
std::make_error_code(std::errc::invalid_argument));
|
|
CurrentOffset = Offset;
|
|
D.CumulativeLocalTime = Extractor.getU64(&Offset);
|
|
if (CurrentOffset == Offset)
|
|
return make_error<StringError>(
|
|
Twine("Error parsing cumulative local time at offset '") +
|
|
Twine(CurrentOffset) + "'",
|
|
std::make_error_code(std::errc::invalid_argument));
|
|
return D;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
Error Profile::addBlock(Block &&B) {
|
|
if (B.PathData.empty())
|
|
return make_error<StringError>(
|
|
"Block may not have empty path data.",
|
|
std::make_error_code(std::errc::invalid_argument));
|
|
|
|
Blocks.emplace_back(std::move(B));
|
|
return Error::success();
|
|
}
|
|
|
|
Expected<std::vector<Profile::FuncID>> Profile::expandPath(PathID P) const {
|
|
auto It = PathIDMap.find(P);
|
|
if (It == PathIDMap.end())
|
|
return make_error<StringError>(
|
|
Twine("PathID not found: ") + Twine(P),
|
|
std::make_error_code(std::errc::invalid_argument));
|
|
std::vector<Profile::FuncID> Path;
|
|
for (auto Node = It->second; Node; Node = Node->Caller)
|
|
Path.push_back(Node->Func);
|
|
return std::move(Path);
|
|
}
|
|
|
|
Profile::PathID Profile::internPath(ArrayRef<FuncID> P) {
|
|
if (P.empty())
|
|
return 0;
|
|
|
|
auto RootToLeafPath = reverse(P);
|
|
|
|
// Find the root.
|
|
auto It = RootToLeafPath.begin();
|
|
auto PathRoot = *It++;
|
|
auto RootIt =
|
|
find_if(Roots, [PathRoot](TrieNode *N) { return N->Func == PathRoot; });
|
|
|
|
// If we've not seen this root before, remember it.
|
|
TrieNode *Node = nullptr;
|
|
if (RootIt == Roots.end()) {
|
|
NodeStorage.emplace_back();
|
|
Node = &NodeStorage.back();
|
|
Node->Func = PathRoot;
|
|
Roots.push_back(Node);
|
|
} else {
|
|
Node = *RootIt;
|
|
}
|
|
|
|
// Now traverse the path, re-creating if necessary.
|
|
while (It != RootToLeafPath.end()) {
|
|
auto NodeFuncID = *It++;
|
|
auto CalleeIt = find_if(Node->Callees, [NodeFuncID](TrieNode *N) {
|
|
return N->Func == NodeFuncID;
|
|
});
|
|
if (CalleeIt == Node->Callees.end()) {
|
|
NodeStorage.emplace_back();
|
|
auto NewNode = &NodeStorage.back();
|
|
NewNode->Func = NodeFuncID;
|
|
NewNode->Caller = Node;
|
|
Node->Callees.push_back(NewNode);
|
|
Node = NewNode;
|
|
} else {
|
|
Node = *CalleeIt;
|
|
}
|
|
}
|
|
|
|
// At this point, Node *must* be pointing at the leaf.
|
|
assert(Node->Func == P.front());
|
|
if (Node->ID == 0) {
|
|
Node->ID = NextID++;
|
|
PathIDMap.insert({Node->ID, Node});
|
|
}
|
|
return Node->ID;
|
|
}
|
|
|
|
Profile mergeProfilesByThread(const Profile &L, const Profile &R) {
|
|
Profile Merged;
|
|
using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
|
|
using PathDataMapPtr = std::unique_ptr<PathDataMap>;
|
|
using PathDataVector = decltype(Profile::Block::PathData);
|
|
using ThreadProfileIndexMap = DenseMap<Profile::ThreadID, PathDataMapPtr>;
|
|
ThreadProfileIndexMap ThreadProfileIndex;
|
|
|
|
for (const auto &P : {std::ref(L), std::ref(R)})
|
|
for (const auto &Block : P.get()) {
|
|
ThreadProfileIndexMap::iterator It;
|
|
std::tie(It, std::ignore) = ThreadProfileIndex.insert(
|
|
{Block.Thread, PathDataMapPtr{new PathDataMap()}});
|
|
for (const auto &PathAndData : Block.PathData) {
|
|
auto &PathID = PathAndData.first;
|
|
auto &Data = PathAndData.second;
|
|
auto NewPathID =
|
|
Merged.internPath(cantFail(P.get().expandPath(PathID)));
|
|
PathDataMap::iterator PathDataIt;
|
|
bool Inserted;
|
|
std::tie(PathDataIt, Inserted) = It->second->insert({NewPathID, Data});
|
|
if (!Inserted) {
|
|
auto &ExistingData = PathDataIt->second;
|
|
ExistingData.CallCount += Data.CallCount;
|
|
ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (const auto &IndexedThreadBlock : ThreadProfileIndex) {
|
|
PathDataVector PathAndData;
|
|
PathAndData.reserve(IndexedThreadBlock.second->size());
|
|
copy(*IndexedThreadBlock.second, std::back_inserter(PathAndData));
|
|
cantFail(
|
|
Merged.addBlock({IndexedThreadBlock.first, std::move(PathAndData)}));
|
|
}
|
|
return Merged;
|
|
}
|
|
|
|
Profile mergeProfilesByStack(const Profile &L, const Profile &R) {
|
|
Profile Merged;
|
|
using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
|
|
PathDataMap PathData;
|
|
using PathDataVector = decltype(Profile::Block::PathData);
|
|
for (const auto &P : {std::ref(L), std::ref(R)})
|
|
for (const auto &Block : P.get())
|
|
for (const auto &PathAndData : Block.PathData) {
|
|
auto &PathId = PathAndData.first;
|
|
auto &Data = PathAndData.second;
|
|
auto NewPathID =
|
|
Merged.internPath(cantFail(P.get().expandPath(PathId)));
|
|
PathDataMap::iterator PathDataIt;
|
|
bool Inserted;
|
|
std::tie(PathDataIt, Inserted) = PathData.insert({NewPathID, Data});
|
|
if (!Inserted) {
|
|
auto &ExistingData = PathDataIt->second;
|
|
ExistingData.CallCount += Data.CallCount;
|
|
ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
|
|
}
|
|
}
|
|
|
|
// In the end there's a single Block, for thread 0.
|
|
PathDataVector Block;
|
|
Block.reserve(PathData.size());
|
|
copy(PathData, std::back_inserter(Block));
|
|
cantFail(Merged.addBlock({0, std::move(Block)}));
|
|
return Merged;
|
|
}
|
|
|
|
Expected<Profile> loadProfile(StringRef Filename) {
|
|
int Fd;
|
|
if (auto EC = sys::fs::openFileForRead(Filename, Fd))
|
|
return make_error<StringError>(
|
|
Twine("Cannot read profile from '") + Filename + "'", EC);
|
|
|
|
uint64_t FileSize;
|
|
if (auto EC = sys::fs::file_size(Filename, FileSize))
|
|
return make_error<StringError>(
|
|
Twine("Cannot get filesize of '") + Filename + "'", EC);
|
|
|
|
std::error_code EC;
|
|
sys::fs::mapped_file_region MappedFile(
|
|
Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC);
|
|
if (EC)
|
|
return make_error<StringError>(
|
|
Twine("Cannot mmap profile '") + Filename + "'", EC);
|
|
StringRef Data(MappedFile.data(), MappedFile.size());
|
|
|
|
Profile P;
|
|
uint32_t Offset = 0;
|
|
DataExtractor Extractor(Data, true, 8);
|
|
|
|
// For each block we get from the file:
|
|
while (Offset != MappedFile.size()) {
|
|
auto HeaderOrError = readBlockHeader(Extractor, Offset);
|
|
if (!HeaderOrError)
|
|
return HeaderOrError.takeError();
|
|
|
|
// TODO: Maybe store this header information for each block, even just for
|
|
// debugging?
|
|
const auto &Header = HeaderOrError.get();
|
|
|
|
// Read in the path data.
|
|
auto PathOrError = readPath(Extractor, Offset);
|
|
if (!PathOrError)
|
|
return PathOrError.takeError();
|
|
const auto &Path = PathOrError.get();
|
|
|
|
// For each path we encounter, we should intern it to get a PathID.
|
|
auto DataOrError = readData(Extractor, Offset);
|
|
if (!DataOrError)
|
|
return DataOrError.takeError();
|
|
auto &Data = DataOrError.get();
|
|
|
|
if (auto E =
|
|
P.addBlock(Profile::Block{Profile::ThreadID{Header.Thread},
|
|
{{P.internPath(Path), std::move(Data)}}}))
|
|
return std::move(E);
|
|
}
|
|
|
|
return P;
|
|
}
|
|
|
|
namespace {
|
|
|
|
struct StackEntry {
|
|
uint64_t Timestamp;
|
|
Profile::FuncID FuncId;
|
|
};
|
|
|
|
} // namespace
|
|
|
|
Expected<Profile> profileFromTrace(const Trace &T) {
|
|
Profile P;
|
|
|
|
// The implementation of the algorithm re-creates the execution of
|
|
// the functions based on the trace data. To do this, we set up a number of
|
|
// data structures to track the execution context of every thread in the
|
|
// Trace.
|
|
DenseMap<Profile::ThreadID, std::vector<StackEntry>> ThreadStacks;
|
|
DenseMap<Profile::ThreadID, DenseMap<Profile::PathID, Profile::Data>>
|
|
ThreadPathData;
|
|
|
|
// We then do a pass through the Trace to account data on a per-thread-basis.
|
|
for (const auto &E : T) {
|
|
auto &TSD = ThreadStacks[E.TId];
|
|
switch (E.Type) {
|
|
case RecordTypes::ENTER:
|
|
case RecordTypes::ENTER_ARG:
|
|
|
|
// Push entries into the function call stack.
|
|
TSD.push_back({E.TSC, E.FuncId});
|
|
break;
|
|
|
|
case RecordTypes::EXIT:
|
|
case RecordTypes::TAIL_EXIT:
|
|
|
|
// Exits cause some accounting to happen, based on the state of the stack.
|
|
// For each function we pop off the stack, we take note of the path and
|
|
// record the cumulative state for this path. As we're doing this, we
|
|
// intern the path into the Profile.
|
|
while (!TSD.empty()) {
|
|
auto Top = TSD.back();
|
|
auto FunctionLocalTime = AbsoluteDifference(Top.Timestamp, E.TSC);
|
|
SmallVector<Profile::FuncID, 16> Path;
|
|
transform(reverse(TSD), std::back_inserter(Path),
|
|
std::mem_fn(&StackEntry::FuncId));
|
|
auto InternedPath = P.internPath(Path);
|
|
auto &TPD = ThreadPathData[E.TId][InternedPath];
|
|
++TPD.CallCount;
|
|
TPD.CumulativeLocalTime += FunctionLocalTime;
|
|
TSD.pop_back();
|
|
|
|
// If we've matched the corresponding entry event for this function,
|
|
// then we exit the loop.
|
|
if (Top.FuncId == E.FuncId)
|
|
break;
|
|
|
|
// FIXME: Consider the intermediate times and the cumulative tree time
|
|
// as well.
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Once we've gone through the Trace, we now create one Block per thread in
|
|
// the Profile.
|
|
for (const auto &ThreadPaths : ThreadPathData) {
|
|
const auto &TID = ThreadPaths.first;
|
|
const auto &PathsData = ThreadPaths.second;
|
|
if (auto E = P.addBlock({
|
|
TID,
|
|
std::vector<std::pair<Profile::PathID, Profile::Data>>(
|
|
PathsData.begin(), PathsData.end()),
|
|
}))
|
|
return std::move(E);
|
|
}
|
|
|
|
return P;
|
|
}
|
|
|
|
} // namespace xray
|
|
} // namespace llvm
|