forked from OSchip/llvm-project
[XRay][compiler-rt] Simplify Allocator Implementation
Summary: This change simplifies the XRay Allocator implementation to self-manage an mmap'ed memory segment instead of using the internal allocator implementation in sanitizer_common. We've found through benchmarks and profiling these benchmarks in D48879 that using the internal allocator in sanitizer_common introduces a bottleneck on allocating memory through a central spinlock. This change allows thread-local allocators to eliminate contention on the centralized allocator. To get the most benefit from this approach, we also use a managed allocator for the chunk elements used by the segmented array implementation. This gives us the chance to amortize the cost of allocating memory when creating these internal segmented array data structures. We also took the opportunity to remove the preallocation argument from the allocator API, simplifying the usage of the allocator throughout the profiling implementation. In this change we also tweak some of the flag values to reduce the amount of maximum memory we use/need for each thread, when requesting memory through mmap. Depends on D48956. Reviewers: kpw, eizan Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D49217 llvm-svn: 337342
This commit is contained in:
parent
1e3feb49e3
commit
9d6b7a5f2b
|
@ -22,16 +22,16 @@ struct TestData {
|
|||
s64 Second;
|
||||
};
|
||||
|
||||
TEST(AllocatorTest, Construction) { Allocator<sizeof(TestData)> A(2 << 11, 0); }
|
||||
TEST(AllocatorTest, Construction) { Allocator<sizeof(TestData)> A(2 << 11); }
|
||||
|
||||
TEST(AllocatorTest, Allocate) {
|
||||
Allocator<sizeof(TestData)> A(2 << 11, 0);
|
||||
Allocator<sizeof(TestData)> A(2 << 11);
|
||||
auto B = A.Allocate();
|
||||
ASSERT_NE(B.Data, nullptr);
|
||||
}
|
||||
|
||||
TEST(AllocatorTest, OverAllocate) {
|
||||
Allocator<sizeof(TestData)> A(sizeof(TestData), 0);
|
||||
Allocator<sizeof(TestData)> A(sizeof(TestData));
|
||||
auto B1 = A.Allocate();
|
||||
(void)B1;
|
||||
auto B2 = A.Allocate();
|
||||
|
|
|
@ -19,8 +19,6 @@ namespace __xray {
|
|||
namespace {
|
||||
|
||||
TEST(FunctionCallTrieTest, ConstructWithTLSAllocators) {
|
||||
// FIXME: Support passing in configuration for allocators in the allocator
|
||||
// constructors.
|
||||
profilingFlags()->setDefaults();
|
||||
FunctionCallTrie::Allocators Allocators = FunctionCallTrie::InitAllocators();
|
||||
FunctionCallTrie Trie(Allocators);
|
||||
|
@ -47,6 +45,7 @@ TEST(FunctionCallTrieTest, EnterAndExitFunction) {
|
|||
}
|
||||
|
||||
TEST(FunctionCallTrieTest, MissingFunctionEntry) {
|
||||
profilingFlags()->setDefaults();
|
||||
auto A = FunctionCallTrie::InitAllocators();
|
||||
FunctionCallTrie Trie(A);
|
||||
Trie.exitFunction(1, 1);
|
||||
|
@ -56,6 +55,7 @@ TEST(FunctionCallTrieTest, MissingFunctionEntry) {
|
|||
}
|
||||
|
||||
TEST(FunctionCallTrieTest, NoMatchingEntersForExit) {
|
||||
profilingFlags()->setDefaults();
|
||||
auto A = FunctionCallTrie::InitAllocators();
|
||||
FunctionCallTrie Trie(A);
|
||||
Trie.enterFunction(2, 1);
|
||||
|
@ -63,16 +63,19 @@ TEST(FunctionCallTrieTest, NoMatchingEntersForExit) {
|
|||
Trie.exitFunction(1, 5);
|
||||
const auto &R = Trie.getRoots();
|
||||
|
||||
ASSERT_TRUE(R.empty());
|
||||
ASSERT_FALSE(R.empty());
|
||||
EXPECT_EQ(R.size(), size_t{1});
|
||||
}
|
||||
|
||||
TEST(FunctionCallTrieTest, MissingFunctionExit) {
|
||||
profilingFlags()->setDefaults();
|
||||
auto A = FunctionCallTrie::InitAllocators();
|
||||
FunctionCallTrie Trie(A);
|
||||
Trie.enterFunction(1, 1);
|
||||
const auto &R = Trie.getRoots();
|
||||
|
||||
ASSERT_TRUE(R.empty());
|
||||
ASSERT_FALSE(R.empty());
|
||||
EXPECT_EQ(R.size(), size_t{1});
|
||||
}
|
||||
|
||||
TEST(FunctionCallTrieTest, MultipleRoots) {
|
||||
|
|
|
@ -12,27 +12,29 @@ struct TestData {
|
|||
TestData(s64 F, s64 S) : First(F), Second(S) {}
|
||||
};
|
||||
|
||||
TEST(SegmentedArrayTest, Construction) {
|
||||
Array<TestData> Data;
|
||||
(void)Data;
|
||||
}
|
||||
|
||||
TEST(SegmentedArrayTest, ConstructWithAllocator) {
|
||||
TEST(SegmentedArrayTest, ConstructWithAllocators) {
|
||||
using AllocatorType = typename Array<TestData>::AllocatorType;
|
||||
AllocatorType A(1 << 4, 0);
|
||||
Array<TestData> Data(A);
|
||||
AllocatorType A(1 << 4);
|
||||
ChunkAllocator CA(1 << 4);
|
||||
Array<TestData> Data(A, CA);
|
||||
(void)Data;
|
||||
}
|
||||
|
||||
TEST(SegmentedArrayTest, ConstructAndPopulate) {
|
||||
Array<TestData> data;
|
||||
using AllocatorType = typename Array<TestData>::AllocatorType;
|
||||
AllocatorType A(1 << 4);
|
||||
ChunkAllocator CA(1 << 4);
|
||||
Array<TestData> data(A, CA);
|
||||
ASSERT_NE(data.Append(TestData{0, 0}), nullptr);
|
||||
ASSERT_NE(data.Append(TestData{1, 1}), nullptr);
|
||||
ASSERT_EQ(data.size(), 2u);
|
||||
}
|
||||
|
||||
TEST(SegmentedArrayTest, ConstructPopulateAndLookup) {
|
||||
Array<TestData> data;
|
||||
using AllocatorType = typename Array<TestData>::AllocatorType;
|
||||
AllocatorType A(1 << 4);
|
||||
ChunkAllocator CA(1 << 4);
|
||||
Array<TestData> data(A, CA);
|
||||
ASSERT_NE(data.Append(TestData{0, 1}), nullptr);
|
||||
ASSERT_EQ(data.size(), 1u);
|
||||
ASSERT_EQ(data[0].First, 0);
|
||||
|
@ -40,7 +42,10 @@ TEST(SegmentedArrayTest, ConstructPopulateAndLookup) {
|
|||
}
|
||||
|
||||
TEST(SegmentedArrayTest, PopulateWithMoreElements) {
|
||||
Array<TestData> data;
|
||||
using AllocatorType = typename Array<TestData>::AllocatorType;
|
||||
AllocatorType A(1 << 24);
|
||||
ChunkAllocator CA(1 << 20);
|
||||
Array<TestData> data(A, CA);
|
||||
static const auto kMaxElements = 100u;
|
||||
for (auto I = 0u; I < kMaxElements; ++I) {
|
||||
ASSERT_NE(data.Append(TestData{I, I + 1}), nullptr);
|
||||
|
@ -53,14 +58,20 @@ TEST(SegmentedArrayTest, PopulateWithMoreElements) {
|
|||
}
|
||||
|
||||
TEST(SegmentedArrayTest, AppendEmplace) {
|
||||
Array<TestData> data;
|
||||
using AllocatorType = typename Array<TestData>::AllocatorType;
|
||||
AllocatorType A(1 << 4);
|
||||
ChunkAllocator CA(1 << 4);
|
||||
Array<TestData> data(A, CA);
|
||||
ASSERT_NE(data.AppendEmplace(1, 1), nullptr);
|
||||
ASSERT_EQ(data[0].First, 1);
|
||||
ASSERT_EQ(data[0].Second, 1);
|
||||
}
|
||||
|
||||
TEST(SegmentedArrayTest, AppendAndTrim) {
|
||||
Array<TestData> data;
|
||||
using AllocatorType = typename Array<TestData>::AllocatorType;
|
||||
AllocatorType A(1 << 4);
|
||||
ChunkAllocator CA(1 << 4);
|
||||
Array<TestData> data(A, CA);
|
||||
ASSERT_NE(data.AppendEmplace(1, 1), nullptr);
|
||||
ASSERT_EQ(data.size(), 1u);
|
||||
data.trim(1);
|
||||
|
@ -69,7 +80,10 @@ TEST(SegmentedArrayTest, AppendAndTrim) {
|
|||
}
|
||||
|
||||
TEST(SegmentedArrayTest, IteratorAdvance) {
|
||||
Array<TestData> data;
|
||||
using AllocatorType = typename Array<TestData>::AllocatorType;
|
||||
AllocatorType A(1 << 4);
|
||||
ChunkAllocator CA(1 << 4);
|
||||
Array<TestData> data(A, CA);
|
||||
ASSERT_TRUE(data.empty());
|
||||
ASSERT_EQ(data.begin(), data.end());
|
||||
auto I0 = data.begin();
|
||||
|
@ -88,7 +102,10 @@ TEST(SegmentedArrayTest, IteratorAdvance) {
|
|||
}
|
||||
|
||||
TEST(SegmentedArrayTest, IteratorRetreat) {
|
||||
Array<TestData> data;
|
||||
using AllocatorType = typename Array<TestData>::AllocatorType;
|
||||
AllocatorType A(1 << 4);
|
||||
ChunkAllocator CA(1 << 4);
|
||||
Array<TestData> data(A, CA);
|
||||
ASSERT_TRUE(data.empty());
|
||||
ASSERT_EQ(data.begin(), data.end());
|
||||
ASSERT_NE(data.AppendEmplace(1, 1), nullptr);
|
||||
|
@ -108,8 +125,9 @@ TEST(SegmentedArrayTest, IteratorRetreat) {
|
|||
|
||||
TEST(SegmentedArrayTest, IteratorTrimBehaviour) {
|
||||
using AllocatorType = typename Array<TestData>::AllocatorType;
|
||||
AllocatorType A(1 << 10, 0);
|
||||
Array<TestData> Data(A);
|
||||
AllocatorType A(1 << 20);
|
||||
ChunkAllocator CA(1 << 10);
|
||||
Array<TestData> Data(A, CA);
|
||||
ASSERT_TRUE(Data.empty());
|
||||
auto I0Begin = Data.begin(), I0End = Data.end();
|
||||
// Add enough elements in Data to have more than one chunk.
|
||||
|
@ -160,8 +178,9 @@ struct ShadowStackEntry {
|
|||
|
||||
TEST(SegmentedArrayTest, SimulateStackBehaviour) {
|
||||
using AllocatorType = typename Array<ShadowStackEntry>::AllocatorType;
|
||||
AllocatorType A(1 << 10, 0);
|
||||
Array<ShadowStackEntry> Data(A);
|
||||
AllocatorType A(1 << 10);
|
||||
ChunkAllocator CA(1 << 10);
|
||||
Array<ShadowStackEntry> Data(A, CA);
|
||||
static uint64_t Dummy = 0;
|
||||
constexpr uint64_t Max = 9;
|
||||
|
||||
|
|
|
@ -16,10 +16,11 @@
|
|||
#ifndef XRAY_ALLOCATOR_H
|
||||
#define XRAY_ALLOCATOR_H
|
||||
|
||||
#include "sanitizer_common/sanitizer_allocator_internal.h"
|
||||
#include "sanitizer_common/sanitizer_common.h"
|
||||
#include "sanitizer_common/sanitizer_internal_defs.h"
|
||||
#include "sanitizer_common/sanitizer_mutex.h"
|
||||
#include "sanitizer_common/sanitizer_posix.h"
|
||||
#include "sys/mman.h"
|
||||
#include "xray_utils.h"
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
@ -36,130 +37,87 @@ namespace __xray {
|
|||
/// allocation function. N is used to compute the size of a block, which is
|
||||
/// cache-line-size multiples worth of memory. We compute the size of a block by
|
||||
/// determining how many cache lines worth of memory is required to subsume N.
|
||||
///
|
||||
/// The Allocator instance will manage its own memory acquired through mmap.
|
||||
/// This severely constrains the platforms on which this can be used to POSIX
|
||||
/// systems where mmap semantics are well-defined.
|
||||
///
|
||||
/// FIXME: Isolate the lower-level memory management to a different abstraction
|
||||
/// that can be platform-specific.
|
||||
template <size_t N> struct Allocator {
|
||||
// The Allocator returns memory as Block instances.
|
||||
struct Block {
|
||||
/// Compute the minimum cache-line size multiple that is >= N.
|
||||
static constexpr auto Size = nearest_boundary(N, kCacheLineSize);
|
||||
void *Data = nullptr;
|
||||
void *Data;
|
||||
};
|
||||
|
||||
private:
|
||||
// A BlockLink will contain a fixed number of blocks, each with an identifier
|
||||
// to specify whether it's been handed out or not. We keep track of BlockLink
|
||||
// iterators, which are basically a pointer to the link and an offset into
|
||||
// the fixed set of blocks associated with a link. The iterators are
|
||||
// bidirectional.
|
||||
//
|
||||
// We're calling it a "link" in the context of seeing these as a chain of
|
||||
// block pointer containers (i.e. links in a chain).
|
||||
struct BlockLink {
|
||||
static_assert(kCacheLineSize % sizeof(void *) == 0,
|
||||
"Cache line size is not divisible by size of void*; none of "
|
||||
"the assumptions of the BlockLink will hold.");
|
||||
|
||||
// We compute the number of pointers to areas in memory where we consider as
|
||||
// individual blocks we've allocated. To ensure that instances of the
|
||||
// BlockLink object are cache-line sized, we deduct two pointers worth
|
||||
// representing the pointer to the previous link and the backing store for
|
||||
// the whole block.
|
||||
//
|
||||
// This structure corresponds to the following layout:
|
||||
//
|
||||
// Blocks [ 0, 1, 2, .., BlockPtrCount - 2]
|
||||
//
|
||||
static constexpr auto BlockPtrCount =
|
||||
(kCacheLineSize / sizeof(Block *)) - 2;
|
||||
|
||||
BlockLink() {
|
||||
// Zero out Blocks.
|
||||
// FIXME: Use a braced member initializer when we drop support for GCC
|
||||
// 4.8.
|
||||
internal_memset(Blocks, 0, sizeof(Blocks));
|
||||
}
|
||||
|
||||
// FIXME: Align this to cache-line address boundaries?
|
||||
Block Blocks[BlockPtrCount];
|
||||
BlockLink *Prev = nullptr;
|
||||
void *BackingStore = nullptr;
|
||||
};
|
||||
|
||||
static_assert(sizeof(BlockLink) == kCacheLineSize,
|
||||
"BlockLink instances must be cache-line-sized.");
|
||||
|
||||
static BlockLink NullLink;
|
||||
|
||||
// FIXME: Implement a freelist, in case we actually do intend to return memory
|
||||
// to the allocator, as opposed to just de-allocating everything in one go?
|
||||
|
||||
size_t MaxMemory;
|
||||
const size_t MaxMemory{0};
|
||||
void *BackingStore = nullptr;
|
||||
void *AlignedNextBlock = nullptr;
|
||||
size_t AllocatedBlocks = 0;
|
||||
SpinMutex Mutex{};
|
||||
BlockLink *Tail = &NullLink;
|
||||
size_t Counter = 0;
|
||||
|
||||
BlockLink *NewChainLink(uint64_t Alignment) {
|
||||
auto NewChain = reinterpret_cast<BlockLink *>(
|
||||
InternalAlloc(sizeof(BlockLink), nullptr, kCacheLineSize));
|
||||
auto BackingStore = reinterpret_cast<char *>(InternalAlloc(
|
||||
(BlockLink::BlockPtrCount + 1) * Block::Size, nullptr, Alignment));
|
||||
size_t Offset = 0;
|
||||
DCHECK_NE(NewChain, nullptr);
|
||||
DCHECK_NE(BackingStore, nullptr);
|
||||
NewChain->BackingStore = BackingStore;
|
||||
void *Alloc() {
|
||||
SpinMutexLock Lock(&Mutex);
|
||||
if (UNLIKELY(BackingStore == nullptr)) {
|
||||
BackingStore = reinterpret_cast<void *>(
|
||||
internal_mmap(NULL, MaxMemory, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, 0, 0));
|
||||
if (BackingStore == MAP_FAILED) {
|
||||
BackingStore = nullptr;
|
||||
if (Verbosity())
|
||||
Report("XRay Profiling: Failed to allocate memory for allocator.\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Here we ensure that the alignment of the pointers we're handing out
|
||||
// adhere to the alignment requirements of the call to Allocate().
|
||||
for (auto &B : NewChain->Blocks) {
|
||||
auto AlignmentAdjustment =
|
||||
nearest_boundary(reinterpret_cast<uintptr_t>(BackingStore + Offset),
|
||||
Alignment) -
|
||||
reinterpret_cast<uintptr_t>(BackingStore + Offset);
|
||||
B.Data = BackingStore + AlignmentAdjustment + Offset;
|
||||
DCHECK_EQ(reinterpret_cast<uintptr_t>(B.Data) % Alignment, 0);
|
||||
Offset += AlignmentAdjustment + Block::Size;
|
||||
AlignedNextBlock = BackingStore;
|
||||
|
||||
// Ensure that NextBlock is aligned appropriately.
|
||||
auto BackingStoreNum = reinterpret_cast<uintptr_t>(BackingStore);
|
||||
auto AlignedNextBlockNum = nearest_boundary(
|
||||
reinterpret_cast<uintptr_t>(AlignedNextBlock), kCacheLineSize);
|
||||
if (diff(AlignedNextBlockNum, BackingStoreNum) > ptrdiff_t(MaxMemory)) {
|
||||
munmap(BackingStore, MaxMemory);
|
||||
AlignedNextBlock = BackingStore = nullptr;
|
||||
if (Verbosity())
|
||||
Report("XRay Profiling: Cannot obtain enough memory from "
|
||||
"preallocated region.\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
AlignedNextBlock = reinterpret_cast<void *>(AlignedNextBlockNum);
|
||||
|
||||
// Assert that AlignedNextBlock is cache-line aligned.
|
||||
DCHECK_EQ(reinterpret_cast<uintptr_t>(AlignedNextBlock) % kCacheLineSize,
|
||||
0);
|
||||
}
|
||||
NewChain->Prev = Tail;
|
||||
return NewChain;
|
||||
|
||||
if ((AllocatedBlocks * Block::Size) >= MaxMemory)
|
||||
return nullptr;
|
||||
|
||||
// Align the pointer we'd like to return to an appropriate alignment, then
|
||||
// advance the pointer from where to start allocations.
|
||||
void *Result = AlignedNextBlock;
|
||||
AlignedNextBlock = reinterpret_cast<void *>(
|
||||
reinterpret_cast<char *>(AlignedNextBlock) + N);
|
||||
++AllocatedBlocks;
|
||||
return Result;
|
||||
}
|
||||
|
||||
public:
|
||||
Allocator(size_t M, size_t PreAllocate) : MaxMemory(M) {
|
||||
// FIXME: Implement PreAllocate support!
|
||||
}
|
||||
explicit Allocator(size_t M)
|
||||
: MaxMemory(nearest_boundary(M, kCacheLineSize)) {}
|
||||
|
||||
Block Allocate(uint64_t Alignment = 8) {
|
||||
SpinMutexLock Lock(&Mutex);
|
||||
// Check whether we're over quota.
|
||||
if (Counter * Block::Size >= MaxMemory)
|
||||
return {};
|
||||
|
||||
size_t ChainOffset = Counter % BlockLink::BlockPtrCount;
|
||||
|
||||
Block B{};
|
||||
BlockLink *Link = Tail;
|
||||
if (UNLIKELY(Counter == 0 || ChainOffset == 0))
|
||||
Tail = Link = NewChainLink(Alignment);
|
||||
|
||||
B = Link->Blocks[ChainOffset];
|
||||
++Counter;
|
||||
return B;
|
||||
}
|
||||
Block Allocate() { return {Alloc()}; }
|
||||
|
||||
~Allocator() NOEXCEPT {
|
||||
// We need to deallocate all the blocks, including the chain links.
|
||||
for (auto *C = Tail; C != &NullLink;) {
|
||||
// We know that the data block is a large contiguous page, we deallocate
|
||||
// that at once.
|
||||
InternalFree(C->BackingStore);
|
||||
auto Prev = C->Prev;
|
||||
InternalFree(C);
|
||||
C = Prev;
|
||||
if (BackingStore != nullptr) {
|
||||
internal_munmap(BackingStore, MaxMemory);
|
||||
}
|
||||
}
|
||||
}; // namespace __xray
|
||||
|
||||
// Storage for the NullLink sentinel.
|
||||
template <size_t N> typename Allocator<N>::BlockLink Allocator<N>::NullLink;
|
||||
};
|
||||
|
||||
} // namespace __xray
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#ifndef XRAY_FUNCTION_CALL_TRIE_H
|
||||
#define XRAY_FUNCTION_CALL_TRIE_H
|
||||
|
||||
#include "sanitizer_common/sanitizer_allocator_internal.h"
|
||||
#include "xray_profiling_flags.h"
|
||||
#include "xray_segmented_array.h"
|
||||
#include <memory> // For placement new.
|
||||
|
@ -118,9 +119,9 @@ public:
|
|||
|
||||
// We add a constructor here to allow us to inplace-construct through
|
||||
// Array<...>'s AppendEmplace.
|
||||
Node(Node *P, NodeIdPairAllocatorType &A, int64_t CC, int64_t CLT,
|
||||
int32_t F)
|
||||
: Parent(P), Callees(A), CallCount(CC), CumulativeLocalTime(CLT),
|
||||
Node(Node *P, NodeIdPairAllocatorType &A, ChunkAllocator &CA, int64_t CC,
|
||||
int64_t CLT, int32_t F)
|
||||
: Parent(P), Callees(A, CA), CallCount(CC), CumulativeLocalTime(CLT),
|
||||
FId(F) {}
|
||||
|
||||
// TODO: Include the compact histogram.
|
||||
|
@ -152,6 +153,7 @@ public:
|
|||
RootAllocatorType *RootAllocator = nullptr;
|
||||
ShadowStackAllocatorType *ShadowStackAllocator = nullptr;
|
||||
NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
|
||||
ChunkAllocator *ChunkAlloc = nullptr;
|
||||
|
||||
Allocators() {}
|
||||
Allocators(const Allocators &) = delete;
|
||||
|
@ -160,11 +162,12 @@ public:
|
|||
Allocators(Allocators &&O)
|
||||
: NodeAllocator(O.NodeAllocator), RootAllocator(O.RootAllocator),
|
||||
ShadowStackAllocator(O.ShadowStackAllocator),
|
||||
NodeIdPairAllocator(O.NodeIdPairAllocator) {
|
||||
NodeIdPairAllocator(O.NodeIdPairAllocator), ChunkAlloc(O.ChunkAlloc) {
|
||||
O.NodeAllocator = nullptr;
|
||||
O.RootAllocator = nullptr;
|
||||
O.ShadowStackAllocator = nullptr;
|
||||
O.NodeIdPairAllocator = nullptr;
|
||||
O.ChunkAlloc = nullptr;
|
||||
}
|
||||
|
||||
Allocators &operator=(Allocators &&O) {
|
||||
|
@ -188,6 +191,11 @@ public:
|
|||
O.NodeIdPairAllocator = this->NodeIdPairAllocator;
|
||||
this->NodeIdPairAllocator = Tmp;
|
||||
}
|
||||
{
|
||||
auto Tmp = O.ChunkAlloc;
|
||||
O.ChunkAlloc = this->ChunkAlloc;
|
||||
this->ChunkAlloc = Tmp;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -198,18 +206,27 @@ public:
|
|||
if (NodeAllocator != nullptr) {
|
||||
NodeAllocator->~NodeAllocatorType();
|
||||
InternalFree(NodeAllocator);
|
||||
NodeAllocator = nullptr;
|
||||
}
|
||||
if (RootAllocator != nullptr) {
|
||||
RootAllocator->~RootAllocatorType();
|
||||
InternalFree(RootAllocator);
|
||||
RootAllocator = nullptr;
|
||||
}
|
||||
if (ShadowStackAllocator != nullptr) {
|
||||
ShadowStackAllocator->~ShadowStackAllocatorType();
|
||||
InternalFree(ShadowStackAllocator);
|
||||
ShadowStackAllocator = nullptr;
|
||||
}
|
||||
if (NodeIdPairAllocator != nullptr) {
|
||||
NodeIdPairAllocator->~NodeIdPairAllocatorType();
|
||||
InternalFree(NodeIdPairAllocator);
|
||||
NodeIdPairAllocator = nullptr;
|
||||
}
|
||||
if (ChunkAlloc != nullptr) {
|
||||
ChunkAlloc->~ChunkAllocator();
|
||||
InternalFree(ChunkAlloc);
|
||||
ChunkAlloc = nullptr;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -223,24 +240,29 @@ public:
|
|||
Allocators A;
|
||||
auto NodeAllocator = reinterpret_cast<Allocators::NodeAllocatorType *>(
|
||||
InternalAlloc(sizeof(Allocators::NodeAllocatorType)));
|
||||
new (NodeAllocator) Allocators::NodeAllocatorType(Max, 0);
|
||||
new (NodeAllocator) Allocators::NodeAllocatorType(Max);
|
||||
A.NodeAllocator = NodeAllocator;
|
||||
|
||||
auto RootAllocator = reinterpret_cast<Allocators::RootAllocatorType *>(
|
||||
InternalAlloc(sizeof(Allocators::RootAllocatorType)));
|
||||
new (RootAllocator) Allocators::RootAllocatorType(Max, 0);
|
||||
new (RootAllocator) Allocators::RootAllocatorType(Max);
|
||||
A.RootAllocator = RootAllocator;
|
||||
|
||||
auto ShadowStackAllocator =
|
||||
reinterpret_cast<Allocators::ShadowStackAllocatorType *>(
|
||||
InternalAlloc(sizeof(Allocators::ShadowStackAllocatorType)));
|
||||
new (ShadowStackAllocator) Allocators::ShadowStackAllocatorType(Max, 0);
|
||||
new (ShadowStackAllocator) Allocators::ShadowStackAllocatorType(Max);
|
||||
A.ShadowStackAllocator = ShadowStackAllocator;
|
||||
|
||||
auto NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
|
||||
InternalAlloc(sizeof(NodeIdPairAllocatorType)));
|
||||
new (NodeIdPairAllocator) NodeIdPairAllocatorType(Max, 0);
|
||||
new (NodeIdPairAllocator) NodeIdPairAllocatorType(Max);
|
||||
A.NodeIdPairAllocator = NodeIdPairAllocator;
|
||||
|
||||
auto ChunkAlloc = reinterpret_cast<ChunkAllocator *>(
|
||||
InternalAlloc(sizeof(ChunkAllocator)));
|
||||
new (ChunkAlloc) ChunkAllocator(Max);
|
||||
A.ChunkAlloc = ChunkAlloc;
|
||||
return A;
|
||||
}
|
||||
|
||||
|
@ -249,20 +271,22 @@ private:
|
|||
RootArray Roots;
|
||||
ShadowStackArray ShadowStack;
|
||||
NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
|
||||
ChunkAllocator *ChunkAlloc = nullptr;
|
||||
|
||||
public:
|
||||
explicit FunctionCallTrie(const Allocators &A)
|
||||
: Nodes(*A.NodeAllocator), Roots(*A.RootAllocator),
|
||||
ShadowStack(*A.ShadowStackAllocator),
|
||||
NodeIdPairAllocator(A.NodeIdPairAllocator) {}
|
||||
: Nodes(*A.NodeAllocator, *A.ChunkAlloc),
|
||||
Roots(*A.RootAllocator, *A.ChunkAlloc),
|
||||
ShadowStack(*A.ShadowStackAllocator, *A.ChunkAlloc),
|
||||
NodeIdPairAllocator(A.NodeIdPairAllocator), ChunkAlloc(A.ChunkAlloc) {}
|
||||
|
||||
void enterFunction(const int32_t FId, uint64_t TSC) {
|
||||
DCHECK_NE(FId, 0);
|
||||
// This function primarily deals with ensuring that the ShadowStack is
|
||||
// consistent and ready for when an exit event is encountered.
|
||||
if (UNLIKELY(ShadowStack.empty())) {
|
||||
auto NewRoot =
|
||||
Nodes.AppendEmplace(nullptr, *NodeIdPairAllocator, 0, 0, FId);
|
||||
auto NewRoot = Nodes.AppendEmplace(nullptr, *NodeIdPairAllocator,
|
||||
*ChunkAlloc, 0, 0, FId);
|
||||
if (UNLIKELY(NewRoot == nullptr))
|
||||
return;
|
||||
Roots.Append(NewRoot);
|
||||
|
@ -285,8 +309,8 @@ public:
|
|||
}
|
||||
|
||||
// This means we've never seen this stack before, create a new node here.
|
||||
auto NewNode =
|
||||
Nodes.AppendEmplace(TopNode, *NodeIdPairAllocator, 0, 0, FId);
|
||||
auto NewNode = Nodes.AppendEmplace(TopNode, *NodeIdPairAllocator,
|
||||
*ChunkAlloc, 0, 0, FId);
|
||||
if (UNLIKELY(NewNode == nullptr))
|
||||
return;
|
||||
DCHECK_NE(NewNode, nullptr);
|
||||
|
@ -345,13 +369,14 @@ public:
|
|||
using Stack = Array<NodeAndParent>;
|
||||
|
||||
typename Stack::AllocatorType StackAllocator(
|
||||
profilingFlags()->stack_allocator_max, 0);
|
||||
Stack DFSStack(StackAllocator);
|
||||
profilingFlags()->stack_allocator_max);
|
||||
ChunkAllocator StackChunkAllocator(profilingFlags()->stack_allocator_max);
|
||||
Stack DFSStack(StackAllocator, StackChunkAllocator);
|
||||
|
||||
for (const auto Root : getRoots()) {
|
||||
// Add a node in O for this root.
|
||||
auto NewRoot = O.Nodes.AppendEmplace(
|
||||
nullptr, *O.NodeIdPairAllocator, Root->CallCount,
|
||||
nullptr, *O.NodeIdPairAllocator, *O.ChunkAlloc, Root->CallCount,
|
||||
Root->CumulativeLocalTime, Root->FId);
|
||||
|
||||
// Because we cannot allocate more memory we should bail out right away.
|
||||
|
@ -370,8 +395,9 @@ public:
|
|||
DFSStack.trim(1);
|
||||
for (const auto Callee : NP.Node->Callees) {
|
||||
auto NewNode = O.Nodes.AppendEmplace(
|
||||
NP.NewNode, *O.NodeIdPairAllocator, Callee.NodePtr->CallCount,
|
||||
Callee.NodePtr->CumulativeLocalTime, Callee.FId);
|
||||
NP.NewNode, *O.NodeIdPairAllocator, *O.ChunkAlloc,
|
||||
Callee.NodePtr->CallCount, Callee.NodePtr->CumulativeLocalTime,
|
||||
Callee.FId);
|
||||
if (UNLIKELY(NewNode == nullptr))
|
||||
return;
|
||||
NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId);
|
||||
|
@ -396,16 +422,17 @@ public:
|
|||
};
|
||||
using Stack = Array<NodeAndTarget>;
|
||||
typename Stack::AllocatorType StackAllocator(
|
||||
profilingFlags()->stack_allocator_max, 0);
|
||||
Stack DFSStack(StackAllocator);
|
||||
profilingFlags()->stack_allocator_max);
|
||||
ChunkAllocator CA(profilingFlags()->stack_allocator_max);
|
||||
Stack DFSStack(StackAllocator, CA);
|
||||
|
||||
for (const auto Root : getRoots()) {
|
||||
Node *TargetRoot = nullptr;
|
||||
auto R = O.Roots.find_element(
|
||||
[&](const Node *Node) { return Node->FId == Root->FId; });
|
||||
if (R == nullptr) {
|
||||
TargetRoot = O.Nodes.AppendEmplace(nullptr, *O.NodeIdPairAllocator, 0,
|
||||
0, Root->FId);
|
||||
TargetRoot = O.Nodes.AppendEmplace(nullptr, *O.NodeIdPairAllocator,
|
||||
*O.ChunkAlloc, 0, 0, Root->FId);
|
||||
if (UNLIKELY(TargetRoot == nullptr))
|
||||
return;
|
||||
|
||||
|
@ -429,8 +456,9 @@ public:
|
|||
return C.FId == Callee.FId;
|
||||
});
|
||||
if (TargetCallee == nullptr) {
|
||||
auto NewTargetNode = O.Nodes.AppendEmplace(
|
||||
NT.TargetNode, *O.NodeIdPairAllocator, 0, 0, Callee.FId);
|
||||
auto NewTargetNode =
|
||||
O.Nodes.AppendEmplace(NT.TargetNode, *O.NodeIdPairAllocator,
|
||||
*O.ChunkAlloc, 0, 0, Callee.FId);
|
||||
|
||||
if (UNLIKELY(NewTargetNode == nullptr))
|
||||
return;
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "xray_profile_collector.h"
|
||||
#include "sanitizer_common/sanitizer_allocator_internal.h"
|
||||
#include "sanitizer_common/sanitizer_common.h"
|
||||
#include "sanitizer_common/sanitizer_vector.h"
|
||||
#include "xray_profiling_flags.h"
|
||||
|
@ -117,11 +118,12 @@ struct ProfileRecord {
|
|||
const FunctionCallTrie::Node *Node = nullptr;
|
||||
|
||||
// Constructor for in-place construction.
|
||||
ProfileRecord(PathAllocator &A, const FunctionCallTrie::Node *N)
|
||||
ProfileRecord(PathAllocator &A, ChunkAllocator &CA,
|
||||
const FunctionCallTrie::Node *N)
|
||||
: Path([&] {
|
||||
auto P =
|
||||
reinterpret_cast<PathArray *>(InternalAlloc(sizeof(PathArray)));
|
||||
new (P) PathArray(A);
|
||||
new (P) PathArray(A, CA);
|
||||
return P;
|
||||
}()),
|
||||
Node(N) {}
|
||||
|
@ -135,17 +137,20 @@ using ProfileRecordArray = Array<ProfileRecord>;
|
|||
// the path(s) and the data associated with the path.
|
||||
static void populateRecords(ProfileRecordArray &PRs,
|
||||
ProfileRecord::PathAllocator &PA,
|
||||
const FunctionCallTrie &Trie) {
|
||||
ChunkAllocator &CA, const FunctionCallTrie &Trie) {
|
||||
using StackArray = Array<const FunctionCallTrie::Node *>;
|
||||
using StackAllocator = typename StackArray::AllocatorType;
|
||||
StackAllocator StackAlloc(profilingFlags()->stack_allocator_max, 0);
|
||||
StackArray DFSStack(StackAlloc);
|
||||
StackAllocator StackAlloc(profilingFlags()->stack_allocator_max);
|
||||
ChunkAllocator StackChunkAlloc(profilingFlags()->stack_allocator_max);
|
||||
StackArray DFSStack(StackAlloc, StackChunkAlloc);
|
||||
for (const auto R : Trie.getRoots()) {
|
||||
DFSStack.Append(R);
|
||||
while (!DFSStack.empty()) {
|
||||
auto Node = DFSStack.back();
|
||||
DFSStack.trim(1);
|
||||
auto Record = PRs.AppendEmplace(PA, Node);
|
||||
auto Record = PRs.AppendEmplace(PA, CA, Node);
|
||||
if (Record == nullptr)
|
||||
return;
|
||||
DCHECK_NE(Record, nullptr);
|
||||
|
||||
// Traverse the Node's parents and as we're doing so, get the FIds in
|
||||
|
@ -208,10 +213,11 @@ void serialize() {
|
|||
// Then repopulate the global ProfileBuffers.
|
||||
for (u32 I = 0; I < ThreadTries->Size(); ++I) {
|
||||
using ProfileRecordAllocator = typename ProfileRecordArray::AllocatorType;
|
||||
ProfileRecordAllocator PRAlloc(profilingFlags()->global_allocator_max, 0);
|
||||
ProfileRecordAllocator PRAlloc(profilingFlags()->global_allocator_max);
|
||||
ChunkAllocator CA(profilingFlags()->global_allocator_max);
|
||||
ProfileRecord::PathAllocator PathAlloc(
|
||||
profilingFlags()->global_allocator_max, 0);
|
||||
ProfileRecordArray ProfileRecords(PRAlloc);
|
||||
profilingFlags()->global_allocator_max);
|
||||
ProfileRecordArray ProfileRecords(PRAlloc, CA);
|
||||
|
||||
// First, we want to compute the amount of space we're going to need. We'll
|
||||
// use a local allocator and an __xray::Array<...> to store the intermediary
|
||||
|
@ -220,7 +226,7 @@ void serialize() {
|
|||
const auto &Trie = *(*ThreadTries)[I].Trie;
|
||||
if (Trie.getRoots().empty())
|
||||
continue;
|
||||
populateRecords(ProfileRecords, PathAlloc, Trie);
|
||||
populateRecords(ProfileRecords, PathAlloc, CA, Trie);
|
||||
DCHECK(!Trie.getRoots().empty());
|
||||
DCHECK(!ProfileRecords.empty());
|
||||
|
||||
|
|
|
@ -56,8 +56,9 @@ struct alignas(64) ProfilingData {
|
|||
|
||||
static pthread_key_t ProfilingKey;
|
||||
|
||||
ProfilingData &getThreadLocalData() XRAY_NEVER_INSTRUMENT {
|
||||
thread_local std::aligned_storage<sizeof(ProfilingData)>::type ThreadStorage;
|
||||
thread_local std::aligned_storage<sizeof(ProfilingData)>::type ThreadStorage{};
|
||||
|
||||
static ProfilingData &getThreadLocalData() XRAY_NEVER_INSTRUMENT {
|
||||
if (pthread_getspecific(ProfilingKey) == NULL) {
|
||||
new (&ThreadStorage) ProfilingData{};
|
||||
pthread_setspecific(ProfilingKey, &ThreadStorage);
|
||||
|
@ -86,6 +87,18 @@ ProfilingData &getThreadLocalData() XRAY_NEVER_INSTRUMENT {
|
|||
return TLD;
|
||||
}
|
||||
|
||||
static void cleanupTLD() XRAY_NEVER_INSTRUMENT {
|
||||
auto &TLD = *reinterpret_cast<ProfilingData *>(&ThreadStorage);
|
||||
if (TLD.Allocators != nullptr && TLD.FCT != nullptr) {
|
||||
TLD.FCT->~FunctionCallTrie();
|
||||
TLD.Allocators->~Allocators();
|
||||
InternalFree(TLD.FCT);
|
||||
InternalFree(TLD.Allocators);
|
||||
TLD.FCT = nullptr;
|
||||
TLD.Allocators = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
const char *profilingCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT {
|
||||
|
@ -148,6 +161,9 @@ XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT {
|
|||
|
||||
profileCollectorService::reset();
|
||||
|
||||
// Flush the current thread's local data structures as well.
|
||||
cleanupTLD();
|
||||
|
||||
atomic_store(&ProfilerLogStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
|
||||
memory_order_release);
|
||||
|
||||
|
@ -158,17 +174,12 @@ namespace {
|
|||
|
||||
thread_local atomic_uint8_t ReentranceGuard{0};
|
||||
|
||||
void postCurrentThreadFCT(ProfilingData &TLD) {
|
||||
static void postCurrentThreadFCT(ProfilingData &TLD) {
|
||||
if (TLD.Allocators == nullptr || TLD.FCT == nullptr)
|
||||
return;
|
||||
|
||||
profileCollectorService::post(*TLD.FCT, GetTid());
|
||||
TLD.FCT->~FunctionCallTrie();
|
||||
TLD.Allocators->~Allocators();
|
||||
InternalFree(TLD.FCT);
|
||||
InternalFree(TLD.Allocators);
|
||||
TLD.FCT = nullptr;
|
||||
TLD.Allocators = nullptr;
|
||||
cleanupTLD();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -294,10 +305,14 @@ profilingLoggingInit(size_t BufferSize, size_t BufferMax, void *Options,
|
|||
// ABI functions for registering exit handlers.
|
||||
Atexit(+[] {
|
||||
// Finalize and flush.
|
||||
if (profilingFinalize() != XRAY_LOG_FINALIZED)
|
||||
if (profilingFinalize() != XRAY_LOG_FINALIZED) {
|
||||
cleanupTLD();
|
||||
return;
|
||||
if (profilingFlush() != XRAY_LOG_FLUSHED)
|
||||
}
|
||||
if (profilingFlush() != XRAY_LOG_FLUSHED) {
|
||||
cleanupTLD();
|
||||
return;
|
||||
}
|
||||
if (Verbosity())
|
||||
Report("XRay Profile flushed at exit.");
|
||||
});
|
||||
|
|
|
@ -18,7 +18,7 @@ XRAY_FLAG(uptr, per_thread_allocator_max, 2 << 20,
|
|||
"Maximum size of any single per-thread allocator.")
|
||||
XRAY_FLAG(uptr, global_allocator_max, 2 << 24,
|
||||
"Maximum size of the global allocator for profile storage.")
|
||||
XRAY_FLAG(uptr, stack_allocator_max, 2 << 24,
|
||||
XRAY_FLAG(uptr, stack_allocator_max, 2 << 20,
|
||||
"Maximum size of the traversal stack allocator.")
|
||||
XRAY_FLAG(int, grace_period_ms, 1,
|
||||
"Profile collection will wait this much time in milliseconds before "
|
||||
|
|
|
@ -24,6 +24,13 @@
|
|||
#include <utility>
|
||||
|
||||
namespace __xray {
|
||||
struct Chunk {
|
||||
void *Data;
|
||||
Chunk *Prev;
|
||||
Chunk *Next;
|
||||
};
|
||||
|
||||
using ChunkAllocator = Allocator<next_pow2(sizeof(Chunk)) * 8>;
|
||||
|
||||
/// The Array type provides an interface similar to std::vector<...> but does
|
||||
/// not shrink in size. Once constructed, elements can be appended but cannot be
|
||||
|
@ -51,16 +58,10 @@ private:
|
|||
// TODO: Consider co-locating the chunk information with the data in the
|
||||
// Block, as in an intrusive list -- i.e. putting the next and previous
|
||||
// pointer values inside the Block storage.
|
||||
struct Chunk {
|
||||
typename AllocatorType::Block Block;
|
||||
static constexpr size_t Size = N;
|
||||
Chunk *Prev = this;
|
||||
Chunk *Next = this;
|
||||
};
|
||||
|
||||
static Chunk SentinelChunk;
|
||||
|
||||
AllocatorType *Alloc;
|
||||
ChunkAllocator *ChunkAlloc;
|
||||
Chunk *Head = &SentinelChunk;
|
||||
Chunk *Tail = &SentinelChunk;
|
||||
size_t Size = 0;
|
||||
|
@ -82,32 +83,21 @@ private:
|
|||
return FreeChunk;
|
||||
}
|
||||
|
||||
auto Block = Alloc->Allocate(ElementStorageSize);
|
||||
auto Block = Alloc->Allocate();
|
||||
if (Block.Data == nullptr)
|
||||
return nullptr;
|
||||
|
||||
// TODO: Maybe use a separate managed allocator for Chunk instances?
|
||||
auto C = reinterpret_cast<Chunk *>(
|
||||
InternalAlloc(sizeof(Chunk), nullptr, kCacheLineSize));
|
||||
if (C == nullptr)
|
||||
auto ChunkElement = ChunkAlloc->Allocate();
|
||||
if (ChunkElement.Data == nullptr)
|
||||
return nullptr;
|
||||
C->Block = Block;
|
||||
C->Prev = &SentinelChunk;
|
||||
C->Next = &SentinelChunk;
|
||||
|
||||
// Placement-new the Chunk element at the appropriate location.
|
||||
auto C = reinterpret_cast<Chunk *>(ChunkElement.Data);
|
||||
Chunk LocalC{Block.Data, &SentinelChunk, &SentinelChunk};
|
||||
internal_memcpy(C, &LocalC, sizeof(LocalC));
|
||||
return C;
|
||||
}
|
||||
|
||||
static AllocatorType &GetGlobalAllocator() {
|
||||
static AllocatorType *const GlobalAllocator = [] {
|
||||
AllocatorType *A = reinterpret_cast<AllocatorType *>(
|
||||
InternalAlloc(sizeof(AllocatorType)));
|
||||
new (A) AllocatorType(2 << 10, 0);
|
||||
return A;
|
||||
}();
|
||||
|
||||
return *GlobalAllocator;
|
||||
}
|
||||
|
||||
Chunk *InitHeadAndTail() {
|
||||
DCHECK_EQ(Head, &SentinelChunk);
|
||||
DCHECK_EQ(Tail, &SentinelChunk);
|
||||
|
@ -201,21 +191,21 @@ private:
|
|||
U &operator*() const {
|
||||
DCHECK_NE(C, &SentinelChunk);
|
||||
auto RelOff = Offset % N;
|
||||
return *reinterpret_cast<U *>(reinterpret_cast<char *>(C->Block.Data) +
|
||||
return *reinterpret_cast<U *>(reinterpret_cast<char *>(C->Data) +
|
||||
(RelOff * ElementStorageSize));
|
||||
}
|
||||
|
||||
U *operator->() const {
|
||||
DCHECK_NE(C, &SentinelChunk);
|
||||
auto RelOff = Offset % N;
|
||||
return reinterpret_cast<U *>(reinterpret_cast<char *>(C->Block.Data) +
|
||||
return reinterpret_cast<U *>(reinterpret_cast<char *>(C->Data) +
|
||||
(RelOff * ElementStorageSize));
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
explicit Array(AllocatorType &A) : Alloc(&A) {}
|
||||
Array() : Array(GetGlobalAllocator()) {}
|
||||
explicit Array(AllocatorType &A, ChunkAllocator &CA)
|
||||
: Alloc(&A), ChunkAlloc(&CA) {}
|
||||
|
||||
Array(const Array &) = delete;
|
||||
Array(Array &&O) NOEXCEPT : Alloc(O.Alloc),
|
||||
|
@ -246,9 +236,8 @@ public:
|
|||
if (AppendNewChunk() == nullptr)
|
||||
return nullptr;
|
||||
|
||||
auto Position =
|
||||
reinterpret_cast<T *>(reinterpret_cast<char *>(Tail->Block.Data) +
|
||||
(Offset * ElementStorageSize));
|
||||
auto Position = reinterpret_cast<T *>(reinterpret_cast<char *>(Tail->Data) +
|
||||
(Offset * ElementStorageSize));
|
||||
*Position = E;
|
||||
++Size;
|
||||
return Position;
|
||||
|
@ -268,7 +257,7 @@ public:
|
|||
}
|
||||
|
||||
DCHECK_NE(Tail, &SentinelChunk);
|
||||
auto Position = reinterpret_cast<char *>(LatestChunk->Block.Data) +
|
||||
auto Position = reinterpret_cast<char *>(LatestChunk->Data) +
|
||||
(Offset * ElementStorageSize);
|
||||
DCHECK_EQ(reinterpret_cast<uintptr_t>(Position) % ElementStorageSize, 0);
|
||||
// In-place construct at Position.
|
||||
|
@ -286,7 +275,7 @@ public:
|
|||
Offset -= N;
|
||||
DCHECK_NE(C, &SentinelChunk);
|
||||
}
|
||||
return *reinterpret_cast<T *>(reinterpret_cast<char *>(C->Block.Data) +
|
||||
return *reinterpret_cast<T *>(reinterpret_cast<char *>(C->Data) +
|
||||
(Offset * ElementStorageSize));
|
||||
}
|
||||
|
||||
|
@ -360,7 +349,8 @@ public:
|
|||
// ensure that storage for the SentinelChunk is defined and has a single
|
||||
// address.
|
||||
template <class T, size_t N>
|
||||
typename Array<T, N>::Chunk Array<T, N>::SentinelChunk;
|
||||
Chunk Array<T, N>::SentinelChunk{nullptr, &Array<T, N>::SentinelChunk,
|
||||
&Array<T, N>::SentinelChunk};
|
||||
|
||||
} // namespace __xray
|
||||
|
||||
|
|
|
@ -15,6 +15,8 @@
|
|||
#ifndef XRAY_UTILS_H
|
||||
#define XRAY_UTILS_H
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <sys/types.h>
|
||||
#include <utility>
|
||||
|
||||
|
@ -54,6 +56,14 @@ constexpr size_t next_pow2(size_t number) {
|
|||
return next_pow2_helper(number, 1);
|
||||
}
|
||||
|
||||
template <class T> constexpr T &max(T &A, T &B) { return A > B ? A : B; }
|
||||
|
||||
template <class T> constexpr T &min(T &A, T &B) { return A <= B ? A : B; }
|
||||
|
||||
constexpr ptrdiff_t diff(uintptr_t A, uintptr_t B) {
|
||||
return max(A, B) - min(A, B);
|
||||
}
|
||||
|
||||
} // namespace __xray
|
||||
|
||||
#endif // XRAY_UTILS_H
|
||||
|
|
Loading…
Reference in New Issue