forked from OSchip/llvm-project
[scudo][standalone] Minor optimization & improvements
Summary: A few small improvements and optimizations: - when refilling the free list, push back the last batch and return the front one: this allows to keep the allocations towards the front of the region; - instead of using 48 entries in the shuffle array, use a multiple of `MaxNumCached`; - make the maximum number of batches to create on refil a constant; ultimately it should be configurable, but that's for later; - `initCache` doesn't need to zero out the cache, it's already done. - it turns out that when using `||` or `&&`, the compiler is adamant on adding a short circuit for every part of the expression. Which ends up making somewhat annoying asm with lots of test and conditional jump. I am changing that to bitwise `|` or `&` in two place so that the generated code looks better. Added comments since it might feel weird to people. This yields to some small performance gains overall, nothing drastic though. Reviewers: hctim, morehouse, cferris, eugenis Subscribers: #sanitizers, llvm-commits Tags: #sanitizers, #llvm Differential Revision: https://reviews.llvm.org/D70452
This commit is contained in:
parent
bb775bee21
commit
46240c3872
|
@ -144,7 +144,10 @@ public:
|
||||||
|
|
||||||
TSDRegistryT *getTSDRegistry() { return &TSDRegistry; }
|
TSDRegistryT *getTSDRegistry() { return &TSDRegistry; }
|
||||||
|
|
||||||
void initCache(CacheT *Cache) { Cache->init(&Stats, &Primary); }
|
// The Cache must be provided zero-initialized.
|
||||||
|
void initCache(CacheT *Cache) {
|
||||||
|
Cache->initLinkerInitialized(&Stats, &Primary);
|
||||||
|
}
|
||||||
|
|
||||||
// Release the resources used by a TSD, which involves:
|
// Release the resources used by a TSD, which involves:
|
||||||
// - draining the local quarantine cache to the global quarantine;
|
// - draining the local quarantine cache to the global quarantine;
|
||||||
|
@ -161,7 +164,7 @@ public:
|
||||||
uptr Alignment = MinAlignment,
|
uptr Alignment = MinAlignment,
|
||||||
bool ZeroContents = false) {
|
bool ZeroContents = false) {
|
||||||
initThreadMaybe();
|
initThreadMaybe();
|
||||||
ZeroContents = ZeroContents || Options.ZeroContents;
|
ZeroContents |= static_cast<bool>(Options.ZeroContents);
|
||||||
|
|
||||||
if (UNLIKELY(Alignment > MaxAlignment)) {
|
if (UNLIKELY(Alignment > MaxAlignment)) {
|
||||||
if (Options.MayReturnNull)
|
if (Options.MayReturnNull)
|
||||||
|
@ -181,12 +184,13 @@ public:
|
||||||
((Alignment > MinAlignment) ? Alignment : Chunk::getHeaderSize());
|
((Alignment > MinAlignment) ? Alignment : Chunk::getHeaderSize());
|
||||||
|
|
||||||
// Takes care of extravagantly large sizes as well as integer overflows.
|
// Takes care of extravagantly large sizes as well as integer overflows.
|
||||||
if (UNLIKELY(Size >= MaxAllowedMallocSize ||
|
COMPILER_CHECK(MaxAllowedMallocSize < UINTPTR_MAX - MaxAlignment);
|
||||||
NeededSize >= MaxAllowedMallocSize)) {
|
if (UNLIKELY(Size >= MaxAllowedMallocSize)) {
|
||||||
if (Options.MayReturnNull)
|
if (Options.MayReturnNull)
|
||||||
return nullptr;
|
return nullptr;
|
||||||
reportAllocationSizeTooBig(Size, NeededSize, MaxAllowedMallocSize);
|
reportAllocationSizeTooBig(Size, NeededSize, MaxAllowedMallocSize);
|
||||||
}
|
}
|
||||||
|
DCHECK_LE(Size, NeededSize);
|
||||||
|
|
||||||
void *Block;
|
void *Block;
|
||||||
uptr ClassId;
|
uptr ClassId;
|
||||||
|
@ -541,7 +545,9 @@ private:
|
||||||
Chunk::UnpackedHeader NewHeader = *Header;
|
Chunk::UnpackedHeader NewHeader = *Header;
|
||||||
// If the quarantine is disabled, the actual size of a chunk is 0 or larger
|
// If the quarantine is disabled, the actual size of a chunk is 0 or larger
|
||||||
// than the maximum allowed, we return a chunk directly to the backend.
|
// than the maximum allowed, we return a chunk directly to the backend.
|
||||||
const bool BypassQuarantine = !Quarantine.getCacheSize() || !Size ||
|
// Logical Or can be short-circuited, which introduces unnecessary
|
||||||
|
// conditional jumps, so use bitwise Or and let the compiler be clever.
|
||||||
|
const bool BypassQuarantine = !Quarantine.getCacheSize() | !Size |
|
||||||
(Size > Options.QuarantineMaxChunkSize);
|
(Size > Options.QuarantineMaxChunkSize);
|
||||||
if (BypassQuarantine) {
|
if (BypassQuarantine) {
|
||||||
NewHeader.State = Chunk::State::Available;
|
NewHeader.State = Chunk::State::Available;
|
||||||
|
|
|
@ -300,10 +300,10 @@ private:
|
||||||
const uptr NumberOfBlocks = RegionSize / Size;
|
const uptr NumberOfBlocks = RegionSize / Size;
|
||||||
DCHECK_GT(NumberOfBlocks, 0);
|
DCHECK_GT(NumberOfBlocks, 0);
|
||||||
TransferBatch *B = nullptr;
|
TransferBatch *B = nullptr;
|
||||||
constexpr uptr ShuffleArraySize = 48;
|
constexpr u32 ShuffleArraySize = 8U * TransferBatch::MaxNumCached;
|
||||||
void *ShuffleArray[ShuffleArraySize];
|
void *ShuffleArray[ShuffleArraySize];
|
||||||
u32 Count = 0;
|
u32 Count = 0;
|
||||||
const uptr AllocatedUser = NumberOfBlocks * Size;
|
const uptr AllocatedUser = Size * NumberOfBlocks;
|
||||||
for (uptr I = Region; I < Region + AllocatedUser; I += Size) {
|
for (uptr I = Region; I < Region + AllocatedUser; I += Size) {
|
||||||
ShuffleArray[Count++] = reinterpret_cast<void *>(I);
|
ShuffleArray[Count++] = reinterpret_cast<void *>(I);
|
||||||
if (Count == ShuffleArraySize) {
|
if (Count == ShuffleArraySize) {
|
||||||
|
@ -319,6 +319,11 @@ private:
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
DCHECK(B);
|
DCHECK(B);
|
||||||
|
if (!Sci->FreeList.empty()) {
|
||||||
|
Sci->FreeList.push_back(B);
|
||||||
|
B = Sci->FreeList.front();
|
||||||
|
Sci->FreeList.pop_front();
|
||||||
|
}
|
||||||
DCHECK_GT(B->getCount(), 0);
|
DCHECK_GT(B->getCount(), 0);
|
||||||
|
|
||||||
C->getStats().add(StatFree, AllocatedUser);
|
C->getStats().add(StatFree, AllocatedUser);
|
||||||
|
|
|
@ -187,6 +187,8 @@ private:
|
||||||
|
|
||||||
// Call map for user memory with at least this size.
|
// Call map for user memory with at least this size.
|
||||||
static const uptr MapSizeIncrement = 1UL << 17;
|
static const uptr MapSizeIncrement = 1UL << 17;
|
||||||
|
// Fill at most this number of batches from the newly map'd memory.
|
||||||
|
static const u32 MaxNumBatches = 8U;
|
||||||
|
|
||||||
struct RegionStats {
|
struct RegionStats {
|
||||||
uptr PoppedBlocks;
|
uptr PoppedBlocks;
|
||||||
|
@ -289,16 +291,18 @@ private:
|
||||||
C->getStats().add(StatMapped, UserMapSize);
|
C->getStats().add(StatMapped, UserMapSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
const uptr NumberOfBlocks = Min(
|
const u32 NumberOfBlocks = Min(
|
||||||
8UL * MaxCount, (Region->MappedUser - Region->AllocatedUser) / Size);
|
MaxNumBatches * MaxCount,
|
||||||
|
static_cast<u32>((Region->MappedUser - Region->AllocatedUser) / Size));
|
||||||
DCHECK_GT(NumberOfBlocks, 0);
|
DCHECK_GT(NumberOfBlocks, 0);
|
||||||
|
|
||||||
TransferBatch *B = nullptr;
|
TransferBatch *B = nullptr;
|
||||||
constexpr uptr ShuffleArraySize = 48;
|
constexpr u32 ShuffleArraySize =
|
||||||
|
MaxNumBatches * TransferBatch::MaxNumCached;
|
||||||
void *ShuffleArray[ShuffleArraySize];
|
void *ShuffleArray[ShuffleArraySize];
|
||||||
u32 Count = 0;
|
u32 Count = 0;
|
||||||
const uptr P = RegionBeg + Region->AllocatedUser;
|
const uptr P = RegionBeg + Region->AllocatedUser;
|
||||||
const uptr AllocatedUser = NumberOfBlocks * Size;
|
const uptr AllocatedUser = Size * NumberOfBlocks;
|
||||||
for (uptr I = P; I < P + AllocatedUser; I += Size) {
|
for (uptr I = P; I < P + AllocatedUser; I += Size) {
|
||||||
ShuffleArray[Count++] = reinterpret_cast<void *>(I);
|
ShuffleArray[Count++] = reinterpret_cast<void *>(I);
|
||||||
if (Count == ShuffleArraySize) {
|
if (Count == ShuffleArraySize) {
|
||||||
|
@ -314,6 +318,11 @@ private:
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
DCHECK(B);
|
DCHECK(B);
|
||||||
|
if (!Region->FreeList.empty()) {
|
||||||
|
Region->FreeList.push_back(B);
|
||||||
|
B = Region->FreeList.front();
|
||||||
|
Region->FreeList.pop_front();
|
||||||
|
}
|
||||||
DCHECK_GT(B->getCount(), 0);
|
DCHECK_GT(B->getCount(), 0);
|
||||||
|
|
||||||
C->getStats().add(StatFree, AllocatedUser);
|
C->getStats().add(StatFree, AllocatedUser);
|
||||||
|
|
Loading…
Reference in New Issue