forked from OSchip/llvm-project
[scudo][standalone] Minor optimization & improvements
Summary: A few small improvements and optimizations: - when refilling the free list, push back the last batch and return the front one: this allows to keep the allocations towards the front of the region; - instead of using 48 entries in the shuffle array, use a multiple of `MaxNumCached`; - make the maximum number of batches to create on refil a constant; ultimately it should be configurable, but that's for later; - `initCache` doesn't need to zero out the cache, it's already done. - it turns out that when using `||` or `&&`, the compiler is adamant on adding a short circuit for every part of the expression. Which ends up making somewhat annoying asm with lots of test and conditional jump. I am changing that to bitwise `|` or `&` in two place so that the generated code looks better. Added comments since it might feel weird to people. This yields to some small performance gains overall, nothing drastic though. Reviewers: hctim, morehouse, cferris, eugenis Subscribers: #sanitizers, llvm-commits Tags: #sanitizers, #llvm Differential Revision: https://reviews.llvm.org/D70452
This commit is contained in:
parent
bb775bee21
commit
46240c3872
|
@ -144,7 +144,10 @@ public:
|
|||
|
||||
TSDRegistryT *getTSDRegistry() { return &TSDRegistry; }
|
||||
|
||||
void initCache(CacheT *Cache) { Cache->init(&Stats, &Primary); }
|
||||
// The Cache must be provided zero-initialized.
|
||||
void initCache(CacheT *Cache) {
|
||||
Cache->initLinkerInitialized(&Stats, &Primary);
|
||||
}
|
||||
|
||||
// Release the resources used by a TSD, which involves:
|
||||
// - draining the local quarantine cache to the global quarantine;
|
||||
|
@ -161,7 +164,7 @@ public:
|
|||
uptr Alignment = MinAlignment,
|
||||
bool ZeroContents = false) {
|
||||
initThreadMaybe();
|
||||
ZeroContents = ZeroContents || Options.ZeroContents;
|
||||
ZeroContents |= static_cast<bool>(Options.ZeroContents);
|
||||
|
||||
if (UNLIKELY(Alignment > MaxAlignment)) {
|
||||
if (Options.MayReturnNull)
|
||||
|
@ -181,12 +184,13 @@ public:
|
|||
((Alignment > MinAlignment) ? Alignment : Chunk::getHeaderSize());
|
||||
|
||||
// Takes care of extravagantly large sizes as well as integer overflows.
|
||||
if (UNLIKELY(Size >= MaxAllowedMallocSize ||
|
||||
NeededSize >= MaxAllowedMallocSize)) {
|
||||
COMPILER_CHECK(MaxAllowedMallocSize < UINTPTR_MAX - MaxAlignment);
|
||||
if (UNLIKELY(Size >= MaxAllowedMallocSize)) {
|
||||
if (Options.MayReturnNull)
|
||||
return nullptr;
|
||||
reportAllocationSizeTooBig(Size, NeededSize, MaxAllowedMallocSize);
|
||||
}
|
||||
DCHECK_LE(Size, NeededSize);
|
||||
|
||||
void *Block;
|
||||
uptr ClassId;
|
||||
|
@ -541,7 +545,9 @@ private:
|
|||
Chunk::UnpackedHeader NewHeader = *Header;
|
||||
// If the quarantine is disabled, the actual size of a chunk is 0 or larger
|
||||
// than the maximum allowed, we return a chunk directly to the backend.
|
||||
const bool BypassQuarantine = !Quarantine.getCacheSize() || !Size ||
|
||||
// Logical Or can be short-circuited, which introduces unnecessary
|
||||
// conditional jumps, so use bitwise Or and let the compiler be clever.
|
||||
const bool BypassQuarantine = !Quarantine.getCacheSize() | !Size |
|
||||
(Size > Options.QuarantineMaxChunkSize);
|
||||
if (BypassQuarantine) {
|
||||
NewHeader.State = Chunk::State::Available;
|
||||
|
|
|
@ -300,10 +300,10 @@ private:
|
|||
const uptr NumberOfBlocks = RegionSize / Size;
|
||||
DCHECK_GT(NumberOfBlocks, 0);
|
||||
TransferBatch *B = nullptr;
|
||||
constexpr uptr ShuffleArraySize = 48;
|
||||
constexpr u32 ShuffleArraySize = 8U * TransferBatch::MaxNumCached;
|
||||
void *ShuffleArray[ShuffleArraySize];
|
||||
u32 Count = 0;
|
||||
const uptr AllocatedUser = NumberOfBlocks * Size;
|
||||
const uptr AllocatedUser = Size * NumberOfBlocks;
|
||||
for (uptr I = Region; I < Region + AllocatedUser; I += Size) {
|
||||
ShuffleArray[Count++] = reinterpret_cast<void *>(I);
|
||||
if (Count == ShuffleArraySize) {
|
||||
|
@ -319,6 +319,11 @@ private:
|
|||
return nullptr;
|
||||
}
|
||||
DCHECK(B);
|
||||
if (!Sci->FreeList.empty()) {
|
||||
Sci->FreeList.push_back(B);
|
||||
B = Sci->FreeList.front();
|
||||
Sci->FreeList.pop_front();
|
||||
}
|
||||
DCHECK_GT(B->getCount(), 0);
|
||||
|
||||
C->getStats().add(StatFree, AllocatedUser);
|
||||
|
|
|
@ -187,6 +187,8 @@ private:
|
|||
|
||||
// Call map for user memory with at least this size.
|
||||
static const uptr MapSizeIncrement = 1UL << 17;
|
||||
// Fill at most this number of batches from the newly map'd memory.
|
||||
static const u32 MaxNumBatches = 8U;
|
||||
|
||||
struct RegionStats {
|
||||
uptr PoppedBlocks;
|
||||
|
@ -289,16 +291,18 @@ private:
|
|||
C->getStats().add(StatMapped, UserMapSize);
|
||||
}
|
||||
|
||||
const uptr NumberOfBlocks = Min(
|
||||
8UL * MaxCount, (Region->MappedUser - Region->AllocatedUser) / Size);
|
||||
const u32 NumberOfBlocks = Min(
|
||||
MaxNumBatches * MaxCount,
|
||||
static_cast<u32>((Region->MappedUser - Region->AllocatedUser) / Size));
|
||||
DCHECK_GT(NumberOfBlocks, 0);
|
||||
|
||||
TransferBatch *B = nullptr;
|
||||
constexpr uptr ShuffleArraySize = 48;
|
||||
constexpr u32 ShuffleArraySize =
|
||||
MaxNumBatches * TransferBatch::MaxNumCached;
|
||||
void *ShuffleArray[ShuffleArraySize];
|
||||
u32 Count = 0;
|
||||
const uptr P = RegionBeg + Region->AllocatedUser;
|
||||
const uptr AllocatedUser = NumberOfBlocks * Size;
|
||||
const uptr AllocatedUser = Size * NumberOfBlocks;
|
||||
for (uptr I = P; I < P + AllocatedUser; I += Size) {
|
||||
ShuffleArray[Count++] = reinterpret_cast<void *>(I);
|
||||
if (Count == ShuffleArraySize) {
|
||||
|
@ -314,6 +318,11 @@ private:
|
|||
return nullptr;
|
||||
}
|
||||
DCHECK(B);
|
||||
if (!Region->FreeList.empty()) {
|
||||
Region->FreeList.push_back(B);
|
||||
B = Region->FreeList.front();
|
||||
Region->FreeList.pop_front();
|
||||
}
|
||||
DCHECK_GT(B->getCount(), 0);
|
||||
|
||||
C->getStats().add(StatFree, AllocatedUser);
|
||||
|
|
Loading…
Reference in New Issue