forked from OSchip/llvm-project
[scudo][standalone] Shift some data from dynamic to static
Summary: Most of our larger data is dynamically allocated (via `map`) but it became an hindrance with regard to init time, for a cost to benefit ratio that is not great. So change the `TSD`s, `RegionInfo`, `ByteMap` to be static. Additionally, for reclaiming, we used mapped & unmapped a buffer each time, which is costly. It turns out that we can have a static buffer, and that there isn't much contention on it. One of the other things changed here, is that we hard set the number of TSDs on Android to the maximum number, as there could be a situation where cores are put to sleep and we could miss some. Subscribers: mgorny, #sanitizers, llvm-commits Tags: #sanitizers, #llvm Differential Revision: https://reviews.llvm.org/D74696
This commit is contained in:
parent
7603bfb4b0
commit
fc69967a4b
|
@ -88,6 +88,7 @@ set(SCUDO_SOURCES
|
||||||
flags_parser.cpp
|
flags_parser.cpp
|
||||||
fuchsia.cpp
|
fuchsia.cpp
|
||||||
linux.cpp
|
linux.cpp
|
||||||
|
release.cpp
|
||||||
report.cpp
|
report.cpp
|
||||||
string_utils.cpp
|
string_utils.cpp
|
||||||
)
|
)
|
||||||
|
|
|
@ -17,12 +17,10 @@ namespace scudo {
|
||||||
|
|
||||||
template <uptr Size> class FlatByteMap {
|
template <uptr Size> class FlatByteMap {
|
||||||
public:
|
public:
|
||||||
void initLinkerInitialized() {
|
void initLinkerInitialized() {}
|
||||||
Map = reinterpret_cast<u8 *>(map(nullptr, Size, "scudo:bytemap"));
|
void init() { memset(Map, 0, sizeof(Map)); }
|
||||||
}
|
|
||||||
void init() { initLinkerInitialized(); }
|
|
||||||
|
|
||||||
void unmapTestOnly() { unmap(reinterpret_cast<void *>(Map), Size); }
|
void unmapTestOnly() {}
|
||||||
|
|
||||||
void set(uptr Index, u8 Value) {
|
void set(uptr Index, u8 Value) {
|
||||||
DCHECK_LT(Index, Size);
|
DCHECK_LT(Index, Size);
|
||||||
|
@ -38,7 +36,7 @@ public:
|
||||||
void enable() {}
|
void enable() {}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
u8 *Map;
|
u8 Map[Size];
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace scudo
|
} // namespace scudo
|
||||||
|
|
|
@ -40,7 +40,8 @@ namespace scudo {
|
||||||
|
|
||||||
template <class SizeClassMapT, uptr RegionSizeLog,
|
template <class SizeClassMapT, uptr RegionSizeLog,
|
||||||
s32 MinReleaseToOsIntervalMs = INT32_MIN,
|
s32 MinReleaseToOsIntervalMs = INT32_MIN,
|
||||||
s32 MaxReleaseToOsIntervalMs = INT32_MAX> class SizeClassAllocator32 {
|
s32 MaxReleaseToOsIntervalMs = INT32_MAX>
|
||||||
|
class SizeClassAllocator32 {
|
||||||
public:
|
public:
|
||||||
typedef SizeClassMapT SizeClassMap;
|
typedef SizeClassMapT SizeClassMap;
|
||||||
// The bytemap can only track UINT8_MAX - 1 classes.
|
// The bytemap can only track UINT8_MAX - 1 classes.
|
||||||
|
@ -49,7 +50,8 @@ public:
|
||||||
static_assert((1UL << RegionSizeLog) >= SizeClassMap::MaxSize, "");
|
static_assert((1UL << RegionSizeLog) >= SizeClassMap::MaxSize, "");
|
||||||
typedef SizeClassAllocator32<SizeClassMapT, RegionSizeLog,
|
typedef SizeClassAllocator32<SizeClassMapT, RegionSizeLog,
|
||||||
MinReleaseToOsIntervalMs,
|
MinReleaseToOsIntervalMs,
|
||||||
MaxReleaseToOsIntervalMs> ThisT;
|
MaxReleaseToOsIntervalMs>
|
||||||
|
ThisT;
|
||||||
typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
|
typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
|
||||||
typedef typename CacheT::TransferBatch TransferBatch;
|
typedef typename CacheT::TransferBatch TransferBatch;
|
||||||
static const bool SupportsMemoryTagging = false;
|
static const bool SupportsMemoryTagging = false;
|
||||||
|
|
|
@ -46,10 +46,9 @@ template <class SizeClassMapT, uptr RegionSizeLog,
|
||||||
class SizeClassAllocator64 {
|
class SizeClassAllocator64 {
|
||||||
public:
|
public:
|
||||||
typedef SizeClassMapT SizeClassMap;
|
typedef SizeClassMapT SizeClassMap;
|
||||||
typedef SizeClassAllocator64<SizeClassMap, RegionSizeLog,
|
typedef SizeClassAllocator64<
|
||||||
MinReleaseToOsIntervalMs,
|
SizeClassMap, RegionSizeLog, MinReleaseToOsIntervalMs,
|
||||||
MaxReleaseToOsIntervalMs,
|
MaxReleaseToOsIntervalMs, MaySupportMemoryTagging>
|
||||||
MaySupportMemoryTagging>
|
|
||||||
ThisT;
|
ThisT;
|
||||||
typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
|
typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
|
||||||
typedef typename CacheT::TransferBatch TransferBatch;
|
typedef typename CacheT::TransferBatch TransferBatch;
|
||||||
|
@ -69,11 +68,6 @@ public:
|
||||||
PrimaryBase = reinterpret_cast<uptr>(
|
PrimaryBase = reinterpret_cast<uptr>(
|
||||||
map(nullptr, PrimarySize, "scudo:primary", MAP_NOACCESS, &Data));
|
map(nullptr, PrimarySize, "scudo:primary", MAP_NOACCESS, &Data));
|
||||||
|
|
||||||
RegionInfoArray = reinterpret_cast<RegionInfo *>(
|
|
||||||
map(nullptr, sizeof(RegionInfo) * NumClasses, "scudo:regioninfo"));
|
|
||||||
DCHECK_EQ(reinterpret_cast<uptr>(RegionInfoArray) % SCUDO_CACHE_LINE_SIZE,
|
|
||||||
0);
|
|
||||||
|
|
||||||
u32 Seed;
|
u32 Seed;
|
||||||
if (UNLIKELY(!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed))))
|
if (UNLIKELY(!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed))))
|
||||||
Seed = static_cast<u32>(getMonotonicTime() ^ (PrimaryBase >> 12));
|
Seed = static_cast<u32>(getMonotonicTime() ^ (PrimaryBase >> 12));
|
||||||
|
@ -106,8 +100,6 @@ public:
|
||||||
|
|
||||||
void unmapTestOnly() {
|
void unmapTestOnly() {
|
||||||
unmap(reinterpret_cast<void *>(PrimaryBase), PrimarySize, UNMAP_ALL, &Data);
|
unmap(reinterpret_cast<void *>(PrimaryBase), PrimarySize, UNMAP_ALL, &Data);
|
||||||
unmap(reinterpret_cast<void *>(RegionInfoArray),
|
|
||||||
sizeof(RegionInfo) * NumClasses);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TransferBatch *popBatch(CacheT *C, uptr ClassId) {
|
TransferBatch *popBatch(CacheT *C, uptr ClassId) {
|
||||||
|
@ -156,7 +148,7 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename F> void iterateOverBlocks(F Callback) const {
|
template <typename F> void iterateOverBlocks(F Callback) {
|
||||||
for (uptr I = 0; I < NumClasses; I++) {
|
for (uptr I = 0; I < NumClasses; I++) {
|
||||||
if (I == SizeClassMap::BatchClassId)
|
if (I == SizeClassMap::BatchClassId)
|
||||||
continue;
|
continue;
|
||||||
|
@ -169,7 +161,7 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void getStats(ScopedString *Str) const {
|
void getStats(ScopedString *Str) {
|
||||||
// TODO(kostyak): get the RSS per region.
|
// TODO(kostyak): get the RSS per region.
|
||||||
uptr TotalMapped = 0;
|
uptr TotalMapped = 0;
|
||||||
uptr PoppedBlocks = 0;
|
uptr PoppedBlocks = 0;
|
||||||
|
@ -252,12 +244,12 @@ private:
|
||||||
static_assert(sizeof(RegionInfo) % SCUDO_CACHE_LINE_SIZE == 0, "");
|
static_assert(sizeof(RegionInfo) % SCUDO_CACHE_LINE_SIZE == 0, "");
|
||||||
|
|
||||||
uptr PrimaryBase;
|
uptr PrimaryBase;
|
||||||
RegionInfo *RegionInfoArray;
|
|
||||||
MapPlatformData Data;
|
MapPlatformData Data;
|
||||||
atomic_s32 ReleaseToOsIntervalMs;
|
atomic_s32 ReleaseToOsIntervalMs;
|
||||||
bool UseMemoryTagging;
|
bool UseMemoryTagging;
|
||||||
|
RegionInfo RegionInfoArray[NumClasses];
|
||||||
|
|
||||||
RegionInfo *getRegionInfo(uptr ClassId) const {
|
RegionInfo *getRegionInfo(uptr ClassId) {
|
||||||
DCHECK_LT(ClassId, NumClasses);
|
DCHECK_LT(ClassId, NumClasses);
|
||||||
return &RegionInfoArray[ClassId];
|
return &RegionInfoArray[ClassId];
|
||||||
}
|
}
|
||||||
|
@ -371,7 +363,7 @@ private:
|
||||||
return B;
|
return B;
|
||||||
}
|
}
|
||||||
|
|
||||||
void getStats(ScopedString *Str, uptr ClassId, uptr Rss) const {
|
void getStats(ScopedString *Str, uptr ClassId, uptr Rss) {
|
||||||
RegionInfo *Region = getRegionInfo(ClassId);
|
RegionInfo *Region = getRegionInfo(ClassId);
|
||||||
if (Region->MappedUser == 0)
|
if (Region->MappedUser == 0)
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -0,0 +1,16 @@
|
||||||
|
//===-- release.cpp ---------------------------------------------*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "release.h"
|
||||||
|
|
||||||
|
namespace scudo {
|
||||||
|
|
||||||
|
HybridMutex PackedCounterArray::Mutex = {};
|
||||||
|
uptr PackedCounterArray::StaticBuffer[1024];
|
||||||
|
|
||||||
|
} // namespace scudo
|
|
@ -11,6 +11,7 @@
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "list.h"
|
#include "list.h"
|
||||||
|
#include "mutex.h"
|
||||||
|
|
||||||
namespace scudo {
|
namespace scudo {
|
||||||
|
|
||||||
|
@ -39,11 +40,13 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
// A packed array of Counters. Each counter occupies 2^N bits, enough to store
|
// A packed array of Counters. Each counter occupies 2^N bits, enough to store
|
||||||
// counter's MaxValue. Ctor will try to allocate the required Buffer via map()
|
// counter's MaxValue. Ctor will try to use a static buffer first, and if that
|
||||||
// and the caller is expected to check whether the initialization was successful
|
// fails (the buffer is too small or already locked), will allocate the
|
||||||
// by checking isAllocated() result. For the performance sake, none of the
|
// required Buffer via map(). The caller is expected to check whether the
|
||||||
// accessors check the validity of the arguments, It is assumed that Index is
|
// initialization was successful by checking isAllocated() result. For
|
||||||
// always in [0, N) range and the value is not incremented past MaxValue.
|
// performance sake, none of the accessors check the validity of the arguments,
|
||||||
|
// It is assumed that Index is always in [0, N) range and the value is not
|
||||||
|
// incremented past MaxValue.
|
||||||
class PackedCounterArray {
|
class PackedCounterArray {
|
||||||
public:
|
public:
|
||||||
PackedCounterArray(uptr NumCounters, uptr MaxValue) : N(NumCounters) {
|
PackedCounterArray(uptr NumCounters, uptr MaxValue) : N(NumCounters) {
|
||||||
|
@ -66,11 +69,20 @@ public:
|
||||||
BufferSize = (roundUpTo(N, static_cast<uptr>(1U) << PackingRatioLog) >>
|
BufferSize = (roundUpTo(N, static_cast<uptr>(1U) << PackingRatioLog) >>
|
||||||
PackingRatioLog) *
|
PackingRatioLog) *
|
||||||
sizeof(*Buffer);
|
sizeof(*Buffer);
|
||||||
Buffer = reinterpret_cast<uptr *>(
|
if (BufferSize <= StaticBufferSize && Mutex.tryLock()) {
|
||||||
map(nullptr, BufferSize, "scudo:counters", MAP_ALLOWNOMEM));
|
Buffer = &StaticBuffer[0];
|
||||||
|
memset(Buffer, 0, BufferSize);
|
||||||
|
} else {
|
||||||
|
Buffer = reinterpret_cast<uptr *>(
|
||||||
|
map(nullptr, BufferSize, "scudo:counters", MAP_ALLOWNOMEM));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
~PackedCounterArray() {
|
~PackedCounterArray() {
|
||||||
if (isAllocated())
|
if (!isAllocated())
|
||||||
|
return;
|
||||||
|
if (Buffer == &StaticBuffer[0])
|
||||||
|
Mutex.unlock();
|
||||||
|
else
|
||||||
unmap(reinterpret_cast<void *>(Buffer), BufferSize);
|
unmap(reinterpret_cast<void *>(Buffer), BufferSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -110,6 +122,10 @@ private:
|
||||||
|
|
||||||
uptr BufferSize;
|
uptr BufferSize;
|
||||||
uptr *Buffer;
|
uptr *Buffer;
|
||||||
|
|
||||||
|
static HybridMutex Mutex;
|
||||||
|
static const uptr StaticBufferSize = 1024U;
|
||||||
|
static uptr StaticBuffer[StaticBufferSize];
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class ReleaseRecorderT> class FreePagesRangeTracker {
|
template <class ReleaseRecorderT> class FreePagesRangeTracker {
|
||||||
|
|
|
@ -25,9 +25,7 @@ template <class Allocator> struct TSDRegistryExT {
|
||||||
void initLinkerInitialized(Allocator *Instance) {
|
void initLinkerInitialized(Allocator *Instance) {
|
||||||
Instance->initLinkerInitialized();
|
Instance->initLinkerInitialized();
|
||||||
CHECK_EQ(pthread_key_create(&PThreadKey, teardownThread<Allocator>), 0);
|
CHECK_EQ(pthread_key_create(&PThreadKey, teardownThread<Allocator>), 0);
|
||||||
FallbackTSD = reinterpret_cast<TSD<Allocator> *>(
|
FallbackTSD.initLinkerInitialized(Instance);
|
||||||
map(nullptr, sizeof(TSD<Allocator>), "scudo:tsd"));
|
|
||||||
FallbackTSD->initLinkerInitialized(Instance);
|
|
||||||
Initialized = true;
|
Initialized = true;
|
||||||
}
|
}
|
||||||
void init(Allocator *Instance) {
|
void init(Allocator *Instance) {
|
||||||
|
@ -35,9 +33,7 @@ template <class Allocator> struct TSDRegistryExT {
|
||||||
initLinkerInitialized(Instance);
|
initLinkerInitialized(Instance);
|
||||||
}
|
}
|
||||||
|
|
||||||
void unmapTestOnly() {
|
void unmapTestOnly() {}
|
||||||
unmap(reinterpret_cast<void *>(FallbackTSD), sizeof(TSD<Allocator>));
|
|
||||||
}
|
|
||||||
|
|
||||||
ALWAYS_INLINE void initThreadMaybe(Allocator *Instance, bool MinimalInit) {
|
ALWAYS_INLINE void initThreadMaybe(Allocator *Instance, bool MinimalInit) {
|
||||||
if (LIKELY(State != ThreadState::NotInitialized))
|
if (LIKELY(State != ThreadState::NotInitialized))
|
||||||
|
@ -51,23 +47,22 @@ template <class Allocator> struct TSDRegistryExT {
|
||||||
*UnlockRequired = false;
|
*UnlockRequired = false;
|
||||||
return &ThreadTSD;
|
return &ThreadTSD;
|
||||||
}
|
}
|
||||||
DCHECK(FallbackTSD);
|
FallbackTSD.lock();
|
||||||
FallbackTSD->lock();
|
|
||||||
*UnlockRequired = true;
|
*UnlockRequired = true;
|
||||||
return FallbackTSD;
|
return &FallbackTSD;
|
||||||
}
|
}
|
||||||
|
|
||||||
// To disable the exclusive TSD registry, we effectively lock the fallback TSD
|
// To disable the exclusive TSD registry, we effectively lock the fallback TSD
|
||||||
// and force all threads to attempt to use it instead of their local one.
|
// and force all threads to attempt to use it instead of their local one.
|
||||||
void disable() {
|
void disable() {
|
||||||
Mutex.lock();
|
Mutex.lock();
|
||||||
FallbackTSD->lock();
|
FallbackTSD.lock();
|
||||||
atomic_store(&Disabled, 1U, memory_order_release);
|
atomic_store(&Disabled, 1U, memory_order_release);
|
||||||
}
|
}
|
||||||
|
|
||||||
void enable() {
|
void enable() {
|
||||||
atomic_store(&Disabled, 0U, memory_order_release);
|
atomic_store(&Disabled, 0U, memory_order_release);
|
||||||
FallbackTSD->unlock();
|
FallbackTSD.unlock();
|
||||||
Mutex.unlock();
|
Mutex.unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -96,7 +91,7 @@ private:
|
||||||
pthread_key_t PThreadKey;
|
pthread_key_t PThreadKey;
|
||||||
bool Initialized;
|
bool Initialized;
|
||||||
atomic_u8 Disabled;
|
atomic_u8 Disabled;
|
||||||
TSD<Allocator> *FallbackTSD;
|
TSD<Allocator> FallbackTSD;
|
||||||
HybridMutex Mutex;
|
HybridMutex Mutex;
|
||||||
static THREADLOCAL ThreadState State;
|
static THREADLOCAL ThreadState State;
|
||||||
static THREADLOCAL TSD<Allocator> ThreadTSD;
|
static THREADLOCAL TSD<Allocator> ThreadTSD;
|
||||||
|
|
|
@ -19,10 +19,9 @@ template <class Allocator, u32 MaxTSDCount> struct TSDRegistrySharedT {
|
||||||
Instance->initLinkerInitialized();
|
Instance->initLinkerInitialized();
|
||||||
CHECK_EQ(pthread_key_create(&PThreadKey, nullptr), 0); // For non-TLS
|
CHECK_EQ(pthread_key_create(&PThreadKey, nullptr), 0); // For non-TLS
|
||||||
const u32 NumberOfCPUs = getNumberOfCPUs();
|
const u32 NumberOfCPUs = getNumberOfCPUs();
|
||||||
NumberOfTSDs =
|
NumberOfTSDs = (SCUDO_ANDROID || NumberOfCPUs == 0)
|
||||||
(NumberOfCPUs == 0) ? MaxTSDCount : Min(NumberOfCPUs, MaxTSDCount);
|
? MaxTSDCount
|
||||||
TSDs = reinterpret_cast<TSD<Allocator> *>(
|
: Min(NumberOfCPUs, MaxTSDCount);
|
||||||
map(nullptr, sizeof(TSD<Allocator>) * NumberOfTSDs, "scudo:tsd"));
|
|
||||||
for (u32 I = 0; I < NumberOfTSDs; I++)
|
for (u32 I = 0; I < NumberOfTSDs; I++)
|
||||||
TSDs[I].initLinkerInitialized(Instance);
|
TSDs[I].initLinkerInitialized(Instance);
|
||||||
// Compute all the coprimes of NumberOfTSDs. This will be used to walk the
|
// Compute all the coprimes of NumberOfTSDs. This will be used to walk the
|
||||||
|
@ -48,8 +47,6 @@ template <class Allocator, u32 MaxTSDCount> struct TSDRegistrySharedT {
|
||||||
}
|
}
|
||||||
|
|
||||||
void unmapTestOnly() {
|
void unmapTestOnly() {
|
||||||
unmap(reinterpret_cast<void *>(TSDs),
|
|
||||||
sizeof(TSD<Allocator>) * NumberOfTSDs);
|
|
||||||
setCurrentTSD(nullptr);
|
setCurrentTSD(nullptr);
|
||||||
pthread_key_delete(PThreadKey);
|
pthread_key_delete(PThreadKey);
|
||||||
}
|
}
|
||||||
|
@ -162,11 +159,11 @@ private:
|
||||||
pthread_key_t PThreadKey;
|
pthread_key_t PThreadKey;
|
||||||
atomic_u32 CurrentIndex;
|
atomic_u32 CurrentIndex;
|
||||||
u32 NumberOfTSDs;
|
u32 NumberOfTSDs;
|
||||||
TSD<Allocator> *TSDs;
|
|
||||||
u32 NumberOfCoPrimes;
|
u32 NumberOfCoPrimes;
|
||||||
u32 CoPrimes[MaxTSDCount];
|
u32 CoPrimes[MaxTSDCount];
|
||||||
bool Initialized;
|
bool Initialized;
|
||||||
HybridMutex Mutex;
|
HybridMutex Mutex;
|
||||||
|
TSD<Allocator> TSDs[MaxTSDCount];
|
||||||
#if SCUDO_LINUX && !_BIONIC
|
#if SCUDO_LINUX && !_BIONIC
|
||||||
static THREADLOCAL TSD<Allocator> *ThreadTSD;
|
static THREADLOCAL TSD<Allocator> *ThreadTSD;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -195,7 +195,7 @@ INTERFACE WEAK int SCUDO_PREFIX(malloc_info)(UNUSED int options, FILE *stream) {
|
||||||
decltype(SCUDO_ALLOCATOR)::PrimaryT::SizeClassMap::MaxSize;
|
decltype(SCUDO_ALLOCATOR)::PrimaryT::SizeClassMap::MaxSize;
|
||||||
auto *sizes = static_cast<scudo::uptr *>(
|
auto *sizes = static_cast<scudo::uptr *>(
|
||||||
SCUDO_PREFIX(calloc)(max_size, sizeof(scudo::uptr)));
|
SCUDO_PREFIX(calloc)(max_size, sizeof(scudo::uptr)));
|
||||||
auto callback = [](uintptr_t, size_t size, void* arg) {
|
auto callback = [](uintptr_t, size_t size, void *arg) {
|
||||||
auto *sizes = reinterpret_cast<scudo::uptr *>(arg);
|
auto *sizes = reinterpret_cast<scudo::uptr *>(arg);
|
||||||
if (size < max_size)
|
if (size < max_size)
|
||||||
sizes[size]++;
|
sizes[size]++;
|
||||||
|
|
Loading…
Reference in New Issue