llvm-project/compiler-rt/lib/xray/tests/unit/function_call_trie_test.cpp

344 lines
10 KiB
C++

//===-- function_call_trie_test.cpp ---------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file is a part of XRay, a function call tracing system.
//
//===----------------------------------------------------------------------===//
#include "xray_function_call_trie.h"
#include "gtest/gtest.h"
#include <cstdint>
namespace __xray {
namespace {
TEST(FunctionCallTrieTest, ConstructWithTLSAllocators) {
profilingFlags()->setDefaults();
FunctionCallTrie::Allocators Allocators = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(Allocators);
}
TEST(FunctionCallTrieTest, EnterAndExitFunction) {
profilingFlags()->setDefaults();
auto A = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(A);
uint64_t TSC = 1;
uint16_t CPU = 0;
Trie.enterFunction(1, TSC++, CPU++);
Trie.exitFunction(1, TSC++, CPU++);
const auto &R = Trie.getRoots();
ASSERT_EQ(R.size(), 1u);
ASSERT_EQ(R.front()->FId, 1);
ASSERT_EQ(R.front()->CallCount, 1u);
ASSERT_EQ(R.front()->CumulativeLocalTime, 1u);
}
TEST(FunctionCallTrieTest, HandleTSCOverflow) {
profilingFlags()->setDefaults();
auto A = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(A);
Trie.enterFunction(1, std::numeric_limits<uint64_t>::max(), 0);
Trie.exitFunction(1, 1, 0);
const auto &R = Trie.getRoots();
ASSERT_EQ(R.size(), 1u);
ASSERT_EQ(R.front()->FId, 1);
ASSERT_EQ(R.front()->CallCount, 1u);
ASSERT_EQ(R.front()->CumulativeLocalTime, 1u);
}
TEST(FunctionCallTrieTest, MaximalCumulativeTime) {
profilingFlags()->setDefaults();
auto A = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(A);
Trie.enterFunction(1, 1, 0);
Trie.exitFunction(1, 0, 0);
const auto &R = Trie.getRoots();
ASSERT_EQ(R.size(), 1u);
ASSERT_EQ(R.front()->FId, 1);
ASSERT_EQ(R.front()->CallCount, 1u);
ASSERT_EQ(R.front()->CumulativeLocalTime,
std::numeric_limits<uint64_t>::max() - 1);
}
TEST(FunctionCallTrieTest, MissingFunctionEntry) {
profilingFlags()->setDefaults();
auto A = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(A);
Trie.exitFunction(1, 1, 0);
const auto &R = Trie.getRoots();
ASSERT_TRUE(R.empty());
}
TEST(FunctionCallTrieTest, NoMatchingEntersForExit) {
profilingFlags()->setDefaults();
auto A = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(A);
Trie.enterFunction(2, 1, 0);
Trie.enterFunction(3, 3, 0);
Trie.exitFunction(1, 5, 0);
const auto &R = Trie.getRoots();
ASSERT_FALSE(R.empty());
EXPECT_EQ(R.size(), size_t{1});
}
TEST(FunctionCallTrieTest, MissingFunctionExit) {
profilingFlags()->setDefaults();
auto A = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(A);
Trie.enterFunction(1, 1, 0);
const auto &R = Trie.getRoots();
ASSERT_FALSE(R.empty());
EXPECT_EQ(R.size(), size_t{1});
}
TEST(FunctionCallTrieTest, MultipleRoots) {
profilingFlags()->setDefaults();
auto A = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(A);
// Enter and exit FId = 1.
Trie.enterFunction(1, 1, 0);
Trie.exitFunction(1, 2, 0);
// Enter and exit FId = 2.
Trie.enterFunction(2, 3, 0);
Trie.exitFunction(2, 4, 0);
const auto &R = Trie.getRoots();
ASSERT_FALSE(R.empty());
ASSERT_EQ(R.size(), 2u);
// Make sure the roots have different IDs.
const auto R0 = R[0];
const auto R1 = R[1];
ASSERT_NE(R0->FId, R1->FId);
// Inspect the roots that they have the right data.
ASSERT_NE(R0, nullptr);
EXPECT_EQ(R0->CallCount, 1u);
EXPECT_EQ(R0->CumulativeLocalTime, 1u);
ASSERT_NE(R1, nullptr);
EXPECT_EQ(R1->CallCount, 1u);
EXPECT_EQ(R1->CumulativeLocalTime, 1u);
}
// While missing an intermediary entry may be rare in practice, we still enforce
// that we can handle the case where we've missed the entry event somehow, in
// between call entry/exits. To illustrate, imagine the following shadow call
// stack:
//
// f0@t0 -> f1@t1 -> f2@t2
//
// If for whatever reason we see an exit for `f2` @ t3, followed by an exit for
// `f0` @ t4 (i.e. no `f1` exit in between) then we need to handle the case of
// accounting local time to `f2` from d = (t3 - t2), then local time to `f1`
// as d' = (t3 - t1) - d, and then local time to `f0` as d'' = (t3 - t0) - d'.
TEST(FunctionCallTrieTest, MissingIntermediaryExit) {
profilingFlags()->setDefaults();
auto A = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(A);
Trie.enterFunction(1, 0, 0);
Trie.enterFunction(2, 100, 0);
Trie.enterFunction(3, 200, 0);
Trie.exitFunction(3, 300, 0);
Trie.exitFunction(1, 400, 0);
// What we should see at this point is all the functions in the trie in a
// specific order (1 -> 2 -> 3) with the appropriate count(s) and local
// latencies.
const auto &R = Trie.getRoots();
ASSERT_FALSE(R.empty());
ASSERT_EQ(R.size(), 1u);
const auto &F1 = *R[0];
ASSERT_EQ(F1.FId, 1);
ASSERT_FALSE(F1.Callees.empty());
const auto &F2 = *F1.Callees[0].NodePtr;
ASSERT_EQ(F2.FId, 2);
ASSERT_FALSE(F2.Callees.empty());
const auto &F3 = *F2.Callees[0].NodePtr;
ASSERT_EQ(F3.FId, 3);
ASSERT_TRUE(F3.Callees.empty());
// Now that we've established the preconditions, we check for specific aspects
// of the nodes.
EXPECT_EQ(F3.CallCount, 1u);
EXPECT_EQ(F2.CallCount, 1u);
EXPECT_EQ(F1.CallCount, 1u);
EXPECT_EQ(F3.CumulativeLocalTime, 100u);
EXPECT_EQ(F2.CumulativeLocalTime, 300u);
EXPECT_EQ(F1.CumulativeLocalTime, 100u);
}
TEST(FunctionCallTrieTest, DeepCallStack) {
// Simulate a relatively deep call stack (32 levels) and ensure that we can
// properly pop all the way up the stack.
profilingFlags()->setDefaults();
auto A = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(A);
for (int i = 0; i < 32; ++i)
Trie.enterFunction(i + 1, i, 0);
Trie.exitFunction(1, 33, 0);
// Here, validate that we have a 32-level deep function call path from the
// root (1) down to the leaf (33).
const auto &R = Trie.getRoots();
ASSERT_EQ(R.size(), 1u);
auto F = R[0];
for (int i = 0; i < 32; ++i) {
EXPECT_EQ(F->FId, i + 1);
EXPECT_EQ(F->CallCount, 1u);
if (F->Callees.empty() && i != 31)
FAIL() << "Empty callees for FId " << F->FId;
if (i != 31)
F = F->Callees[0].NodePtr;
}
}
// TODO: Test that we can handle cross-CPU migrations, where TSCs are not
// guaranteed to be synchronised.
TEST(FunctionCallTrieTest, DeepCopy) {
profilingFlags()->setDefaults();
auto A = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(A);
Trie.enterFunction(1, 0, 0);
Trie.enterFunction(2, 1, 0);
Trie.exitFunction(2, 2, 0);
Trie.enterFunction(3, 3, 0);
Trie.exitFunction(3, 4, 0);
Trie.exitFunction(1, 5, 0);
// We want to make a deep copy and compare notes.
auto B = FunctionCallTrie::InitAllocators();
FunctionCallTrie Copy(B);
Trie.deepCopyInto(Copy);
ASSERT_NE(Trie.getRoots().size(), 0u);
ASSERT_EQ(Trie.getRoots().size(), Copy.getRoots().size());
const auto &R0Orig = *Trie.getRoots()[0];
const auto &R0Copy = *Copy.getRoots()[0];
EXPECT_EQ(R0Orig.FId, 1);
EXPECT_EQ(R0Orig.FId, R0Copy.FId);
ASSERT_EQ(R0Orig.Callees.size(), 2u);
ASSERT_EQ(R0Copy.Callees.size(), 2u);
const auto &F1Orig =
*R0Orig.Callees
.find_element(
[](const FunctionCallTrie::NodeIdPair &R) { return R.FId == 2; })
->NodePtr;
const auto &F1Copy =
*R0Copy.Callees
.find_element(
[](const FunctionCallTrie::NodeIdPair &R) { return R.FId == 2; })
->NodePtr;
EXPECT_EQ(&R0Orig, F1Orig.Parent);
EXPECT_EQ(&R0Copy, F1Copy.Parent);
}
TEST(FunctionCallTrieTest, MergeInto) {
profilingFlags()->setDefaults();
auto A = FunctionCallTrie::InitAllocators();
FunctionCallTrie T0(A);
FunctionCallTrie T1(A);
// 1 -> 2 -> 3
T0.enterFunction(1, 0, 0);
T0.enterFunction(2, 1, 0);
T0.enterFunction(3, 2, 0);
T0.exitFunction(3, 3, 0);
T0.exitFunction(2, 4, 0);
T0.exitFunction(1, 5, 0);
// 1 -> 2 -> 3
T1.enterFunction(1, 0, 0);
T1.enterFunction(2, 1, 0);
T1.enterFunction(3, 2, 0);
T1.exitFunction(3, 3, 0);
T1.exitFunction(2, 4, 0);
T1.exitFunction(1, 5, 0);
// We use a different allocator here to make sure that we're able to transfer
// data into a FunctionCallTrie which uses a different allocator. This
// reflects the intended usage scenario for when we're collecting profiles
// that aggregate across threads.
auto B = FunctionCallTrie::InitAllocators();
FunctionCallTrie Merged(B);
T0.mergeInto(Merged);
T1.mergeInto(Merged);
ASSERT_EQ(Merged.getRoots().size(), 1u);
const auto &R0 = *Merged.getRoots()[0];
EXPECT_EQ(R0.FId, 1);
EXPECT_EQ(R0.CallCount, 2u);
EXPECT_EQ(R0.CumulativeLocalTime, 10u);
EXPECT_EQ(R0.Callees.size(), 1u);
const auto &F1 = *R0.Callees[0].NodePtr;
EXPECT_EQ(F1.FId, 2);
EXPECT_EQ(F1.CallCount, 2u);
EXPECT_EQ(F1.CumulativeLocalTime, 6u);
EXPECT_EQ(F1.Callees.size(), 1u);
const auto &F2 = *F1.Callees[0].NodePtr;
EXPECT_EQ(F2.FId, 3);
EXPECT_EQ(F2.CallCount, 2u);
EXPECT_EQ(F2.CumulativeLocalTime, 2u);
EXPECT_EQ(F2.Callees.size(), 0u);
}
TEST(FunctionCallTrieTest, PlacementNewOnAlignedStorage) {
profilingFlags()->setDefaults();
typename std::aligned_storage<sizeof(FunctionCallTrie::Allocators),
alignof(FunctionCallTrie::Allocators)>::type
AllocatorsStorage;
new (&AllocatorsStorage)
FunctionCallTrie::Allocators(FunctionCallTrie::InitAllocators());
auto *A =
reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage);
typename std::aligned_storage<sizeof(FunctionCallTrie),
alignof(FunctionCallTrie)>::type FCTStorage;
new (&FCTStorage) FunctionCallTrie(*A);
auto *T = reinterpret_cast<FunctionCallTrie *>(&FCTStorage);
// Put some data into it.
T->enterFunction(1, 0, 0);
T->exitFunction(1, 1, 0);
// Re-initialize the objects in storage.
T->~FunctionCallTrie();
A->~Allocators();
new (A) FunctionCallTrie::Allocators(FunctionCallTrie::InitAllocators());
new (T) FunctionCallTrie(*A);
// Then put some data into it again.
T->enterFunction(1, 0, 0);
T->exitFunction(1, 1, 0);
}
} // namespace
} // namespace __xray