[scudo][standalone] Minor optimization & improvements

Summary:
A few small improvements and optimizations:
- when refilling the free list, push back the last batch and return
  the front one: this allows to keep the allocations towards the front
  of the region;
- instead of using 48 entries in the shuffle array, use a multiple of
  `MaxNumCached`;
- make the maximum number of batches to create on refil a constant;
  ultimately it should be configurable, but that's for later;
- `initCache` doesn't need to zero out the cache, it's already done.
- it turns out that when using `||` or `&&`, the compiler is adamant
  on adding a short circuit for every part of the expression. Which
  ends up making somewhat annoying asm with lots of test and
  conditional jump. I am changing that to bitwise `|` or `&` in two
  place so that the generated code looks better. Added comments since
  it might feel weird to people.

This yields to some small performance gains overall, nothing drastic
though.

Reviewers: hctim, morehouse, cferris, eugenis

Subscribers: #sanitizers, llvm-commits

Tags: #sanitizers, #llvm

Differential Revision: https://reviews.llvm.org/D70452
This commit is contained in:
Kostya Kortchinsky 2019-11-19 10:18:38 -08:00
parent bb775bee21
commit 46240c3872
3 changed files with 31 additions and 11 deletions

View File

@ -144,7 +144,10 @@ public:
TSDRegistryT *getTSDRegistry() { return &TSDRegistry; } TSDRegistryT *getTSDRegistry() { return &TSDRegistry; }
void initCache(CacheT *Cache) { Cache->init(&Stats, &Primary); } // The Cache must be provided zero-initialized.
void initCache(CacheT *Cache) {
Cache->initLinkerInitialized(&Stats, &Primary);
}
// Release the resources used by a TSD, which involves: // Release the resources used by a TSD, which involves:
// - draining the local quarantine cache to the global quarantine; // - draining the local quarantine cache to the global quarantine;
@ -161,7 +164,7 @@ public:
uptr Alignment = MinAlignment, uptr Alignment = MinAlignment,
bool ZeroContents = false) { bool ZeroContents = false) {
initThreadMaybe(); initThreadMaybe();
ZeroContents = ZeroContents || Options.ZeroContents; ZeroContents |= static_cast<bool>(Options.ZeroContents);
if (UNLIKELY(Alignment > MaxAlignment)) { if (UNLIKELY(Alignment > MaxAlignment)) {
if (Options.MayReturnNull) if (Options.MayReturnNull)
@ -181,12 +184,13 @@ public:
((Alignment > MinAlignment) ? Alignment : Chunk::getHeaderSize()); ((Alignment > MinAlignment) ? Alignment : Chunk::getHeaderSize());
// Takes care of extravagantly large sizes as well as integer overflows. // Takes care of extravagantly large sizes as well as integer overflows.
if (UNLIKELY(Size >= MaxAllowedMallocSize || COMPILER_CHECK(MaxAllowedMallocSize < UINTPTR_MAX - MaxAlignment);
NeededSize >= MaxAllowedMallocSize)) { if (UNLIKELY(Size >= MaxAllowedMallocSize)) {
if (Options.MayReturnNull) if (Options.MayReturnNull)
return nullptr; return nullptr;
reportAllocationSizeTooBig(Size, NeededSize, MaxAllowedMallocSize); reportAllocationSizeTooBig(Size, NeededSize, MaxAllowedMallocSize);
} }
DCHECK_LE(Size, NeededSize);
void *Block; void *Block;
uptr ClassId; uptr ClassId;
@ -541,7 +545,9 @@ private:
Chunk::UnpackedHeader NewHeader = *Header; Chunk::UnpackedHeader NewHeader = *Header;
// If the quarantine is disabled, the actual size of a chunk is 0 or larger // If the quarantine is disabled, the actual size of a chunk is 0 or larger
// than the maximum allowed, we return a chunk directly to the backend. // than the maximum allowed, we return a chunk directly to the backend.
const bool BypassQuarantine = !Quarantine.getCacheSize() || !Size || // Logical Or can be short-circuited, which introduces unnecessary
// conditional jumps, so use bitwise Or and let the compiler be clever.
const bool BypassQuarantine = !Quarantine.getCacheSize() | !Size |
(Size > Options.QuarantineMaxChunkSize); (Size > Options.QuarantineMaxChunkSize);
if (BypassQuarantine) { if (BypassQuarantine) {
NewHeader.State = Chunk::State::Available; NewHeader.State = Chunk::State::Available;

View File

@ -300,10 +300,10 @@ private:
const uptr NumberOfBlocks = RegionSize / Size; const uptr NumberOfBlocks = RegionSize / Size;
DCHECK_GT(NumberOfBlocks, 0); DCHECK_GT(NumberOfBlocks, 0);
TransferBatch *B = nullptr; TransferBatch *B = nullptr;
constexpr uptr ShuffleArraySize = 48; constexpr u32 ShuffleArraySize = 8U * TransferBatch::MaxNumCached;
void *ShuffleArray[ShuffleArraySize]; void *ShuffleArray[ShuffleArraySize];
u32 Count = 0; u32 Count = 0;
const uptr AllocatedUser = NumberOfBlocks * Size; const uptr AllocatedUser = Size * NumberOfBlocks;
for (uptr I = Region; I < Region + AllocatedUser; I += Size) { for (uptr I = Region; I < Region + AllocatedUser; I += Size) {
ShuffleArray[Count++] = reinterpret_cast<void *>(I); ShuffleArray[Count++] = reinterpret_cast<void *>(I);
if (Count == ShuffleArraySize) { if (Count == ShuffleArraySize) {
@ -319,6 +319,11 @@ private:
return nullptr; return nullptr;
} }
DCHECK(B); DCHECK(B);
if (!Sci->FreeList.empty()) {
Sci->FreeList.push_back(B);
B = Sci->FreeList.front();
Sci->FreeList.pop_front();
}
DCHECK_GT(B->getCount(), 0); DCHECK_GT(B->getCount(), 0);
C->getStats().add(StatFree, AllocatedUser); C->getStats().add(StatFree, AllocatedUser);

View File

@ -187,6 +187,8 @@ private:
// Call map for user memory with at least this size. // Call map for user memory with at least this size.
static const uptr MapSizeIncrement = 1UL << 17; static const uptr MapSizeIncrement = 1UL << 17;
// Fill at most this number of batches from the newly map'd memory.
static const u32 MaxNumBatches = 8U;
struct RegionStats { struct RegionStats {
uptr PoppedBlocks; uptr PoppedBlocks;
@ -289,16 +291,18 @@ private:
C->getStats().add(StatMapped, UserMapSize); C->getStats().add(StatMapped, UserMapSize);
} }
const uptr NumberOfBlocks = Min( const u32 NumberOfBlocks = Min(
8UL * MaxCount, (Region->MappedUser - Region->AllocatedUser) / Size); MaxNumBatches * MaxCount,
static_cast<u32>((Region->MappedUser - Region->AllocatedUser) / Size));
DCHECK_GT(NumberOfBlocks, 0); DCHECK_GT(NumberOfBlocks, 0);
TransferBatch *B = nullptr; TransferBatch *B = nullptr;
constexpr uptr ShuffleArraySize = 48; constexpr u32 ShuffleArraySize =
MaxNumBatches * TransferBatch::MaxNumCached;
void *ShuffleArray[ShuffleArraySize]; void *ShuffleArray[ShuffleArraySize];
u32 Count = 0; u32 Count = 0;
const uptr P = RegionBeg + Region->AllocatedUser; const uptr P = RegionBeg + Region->AllocatedUser;
const uptr AllocatedUser = NumberOfBlocks * Size; const uptr AllocatedUser = Size * NumberOfBlocks;
for (uptr I = P; I < P + AllocatedUser; I += Size) { for (uptr I = P; I < P + AllocatedUser; I += Size) {
ShuffleArray[Count++] = reinterpret_cast<void *>(I); ShuffleArray[Count++] = reinterpret_cast<void *>(I);
if (Count == ShuffleArraySize) { if (Count == ShuffleArraySize) {
@ -314,6 +318,11 @@ private:
return nullptr; return nullptr;
} }
DCHECK(B); DCHECK(B);
if (!Region->FreeList.empty()) {
Region->FreeList.push_back(B);
B = Region->FreeList.front();
Region->FreeList.pop_front();
}
DCHECK_GT(B->getCount(), 0); DCHECK_GT(B->getCount(), 0);
C->getStats().add(StatFree, AllocatedUser); C->getStats().add(StatFree, AllocatedUser);